csv-utils 0.3.14 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile.lock +29 -29
- data/bin/csv-duplicate-finder +69 -0
- data/csv-utils.gemspec +1 -1
- metadata +5 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 0b511a7a2cac6926477ed165212fa2009947dc84040f7d0451f7fbd7fdd9543b
         | 
| 4 | 
            +
              data.tar.gz: 6661363cabaebcd2f21a41159f7ec0712b7729da5dde0cc7b1818415d8f0bc81
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: cb860bdbe29726e44e1af6c528623daf7acc186f48eecefdbd47530d4ae7ff3472a2c7044cef8de079e70d4d9ada0b1bfbff65923fc3fcb9c6236d3d2e9606d8
         | 
| 7 | 
            +
              data.tar.gz: fe7a67353de3c0e4dd5e36b71345abb373c19ca3e23bc0a8624f7f4147058911e487db17218c35e2ae2e77dfaed61185ca5ffea77736c410a3b18d1334514cd8
         | 
    
        data/.ruby-version
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            3. | 
| 1 | 
            +
            3.1.0
         | 
    
        data/Gemfile.lock
    CHANGED
    
    | @@ -2,51 +2,51 @@ GEM | |
| 2 2 | 
             
              remote: http://rubygems.org/
         | 
| 3 3 | 
             
              specs:
         | 
| 4 4 | 
             
                ast (2.4.2)
         | 
| 5 | 
            -
                diff-lcs (1. | 
| 6 | 
            -
                docile (1. | 
| 7 | 
            -
                inheritance-helper (0. | 
| 8 | 
            -
                parallel (1. | 
| 9 | 
            -
                parser (3. | 
| 5 | 
            +
                diff-lcs (1.5.0)
         | 
| 6 | 
            +
                docile (1.4.0)
         | 
| 7 | 
            +
                inheritance-helper (0.2.5)
         | 
| 8 | 
            +
                parallel (1.22.1)
         | 
| 9 | 
            +
                parser (3.1.1.0)
         | 
| 10 10 | 
             
                  ast (~> 2.4.1)
         | 
| 11 | 
            -
                rainbow (3. | 
| 12 | 
            -
                rake (13.0. | 
| 13 | 
            -
                regexp_parser (2. | 
| 14 | 
            -
                rexml (3.2. | 
| 15 | 
            -
                rspec (3. | 
| 16 | 
            -
                  rspec-core (~> 3. | 
| 17 | 
            -
                  rspec-expectations (~> 3. | 
| 18 | 
            -
                  rspec-mocks (~> 3. | 
| 19 | 
            -
                rspec-core (3. | 
| 20 | 
            -
                  rspec-support (~> 3. | 
| 21 | 
            -
                rspec-expectations (3. | 
| 11 | 
            +
                rainbow (3.1.1)
         | 
| 12 | 
            +
                rake (13.0.6)
         | 
| 13 | 
            +
                regexp_parser (2.2.1)
         | 
| 14 | 
            +
                rexml (3.2.5)
         | 
| 15 | 
            +
                rspec (3.11.0)
         | 
| 16 | 
            +
                  rspec-core (~> 3.11.0)
         | 
| 17 | 
            +
                  rspec-expectations (~> 3.11.0)
         | 
| 18 | 
            +
                  rspec-mocks (~> 3.11.0)
         | 
| 19 | 
            +
                rspec-core (3.11.0)
         | 
| 20 | 
            +
                  rspec-support (~> 3.11.0)
         | 
| 21 | 
            +
                rspec-expectations (3.11.0)
         | 
| 22 22 | 
             
                  diff-lcs (>= 1.2.0, < 2.0)
         | 
| 23 | 
            -
                  rspec-support (~> 3. | 
| 24 | 
            -
                rspec-mocks (3. | 
| 23 | 
            +
                  rspec-support (~> 3.11.0)
         | 
| 24 | 
            +
                rspec-mocks (3.11.0)
         | 
| 25 25 | 
             
                  diff-lcs (>= 1.2.0, < 2.0)
         | 
| 26 | 
            -
                  rspec-support (~> 3. | 
| 27 | 
            -
                rspec-support (3. | 
| 28 | 
            -
                rubocop (1. | 
| 26 | 
            +
                  rspec-support (~> 3.11.0)
         | 
| 27 | 
            +
                rspec-support (3.11.0)
         | 
| 28 | 
            +
                rubocop (1.26.1)
         | 
| 29 29 | 
             
                  parallel (~> 1.10)
         | 
| 30 | 
            -
                  parser (>= 3. | 
| 30 | 
            +
                  parser (>= 3.1.0.0)
         | 
| 31 31 | 
             
                  rainbow (>= 2.2.2, < 4.0)
         | 
| 32 32 | 
             
                  regexp_parser (>= 1.8, < 3.0)
         | 
| 33 33 | 
             
                  rexml
         | 
| 34 | 
            -
                  rubocop-ast (>= 1. | 
| 34 | 
            +
                  rubocop-ast (>= 1.16.0, < 2.0)
         | 
| 35 35 | 
             
                  ruby-progressbar (~> 1.7)
         | 
| 36 36 | 
             
                  unicode-display_width (>= 1.4.0, < 3.0)
         | 
| 37 | 
            -
                rubocop-ast (1. | 
| 38 | 
            -
                  parser (>=  | 
| 37 | 
            +
                rubocop-ast (1.16.0)
         | 
| 38 | 
            +
                  parser (>= 3.1.1.0)
         | 
| 39 39 | 
             
                ruby-progressbar (1.11.0)
         | 
| 40 40 | 
             
                simplecov (0.21.2)
         | 
| 41 41 | 
             
                  docile (~> 1.1)
         | 
| 42 42 | 
             
                  simplecov-html (~> 0.11)
         | 
| 43 43 | 
             
                  simplecov_json_formatter (~> 0.1)
         | 
| 44 44 | 
             
                simplecov-html (0.12.3)
         | 
| 45 | 
            -
                simplecov_json_formatter (0.1. | 
| 46 | 
            -
                unicode-display_width (2. | 
| 45 | 
            +
                simplecov_json_formatter (0.1.4)
         | 
| 46 | 
            +
                unicode-display_width (2.1.0)
         | 
| 47 47 |  | 
| 48 48 | 
             
            PLATFORMS
         | 
| 49 | 
            -
              x86_64-darwin- | 
| 49 | 
            +
              x86_64-darwin-21
         | 
| 50 50 |  | 
| 51 51 | 
             
            DEPENDENCIES
         | 
| 52 52 | 
             
              inheritance-helper
         | 
| @@ -56,4 +56,4 @@ DEPENDENCIES | |
| 56 56 | 
             
              simplecov
         | 
| 57 57 |  | 
| 58 58 | 
             
            BUNDLED WITH
         | 
| 59 | 
            -
               2. | 
| 59 | 
            +
               2.3.3
         | 
| @@ -0,0 +1,69 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'optparse'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            options = {
         | 
| 6 | 
            +
              ignore_columns: []
         | 
| 7 | 
            +
            }
         | 
| 8 | 
            +
            OptionParser.new do |opts|
         | 
| 9 | 
            +
              opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file>'
         | 
| 10 | 
            +
             | 
| 11 | 
            +
              opts.on('-h', '--help', 'Prints this help') do
         | 
| 12 | 
            +
                puts opts
         | 
| 13 | 
            +
                exit
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              opts.on('-i', '--ignore HEADERS', 'Comman separated list of headers to ignore') do |v|
         | 
| 17 | 
            +
                options[:ignore_columns] = v.split(',')
         | 
| 18 | 
            +
              end
         | 
| 19 | 
            +
            end.parse!
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            require 'digest/sha2'
         | 
| 22 | 
            +
            require 'json'
         | 
| 23 | 
            +
            require 'csv-utils'
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            csv = CSVUtils::CSVIterator.new(ARGV[0])
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            missing_headers = options[:ignore_columns] - csv.first.keys
         | 
| 28 | 
            +
            unless missing_headers.empty?
         | 
| 29 | 
            +
              raise("unkown headers #{missing_headers.join(', ')} configured ingnore headers")
         | 
| 30 | 
            +
            end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            hashed_rows = {}
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            csv.each_with_index do |row, idx|
         | 
| 35 | 
            +
              options[:ignore_columns].each do |ignore_column|
         | 
| 36 | 
            +
                row[ignore_column] = ''
         | 
| 37 | 
            +
              end
         | 
| 38 | 
            +
              key = Digest::SHA256.hexdigest(row.to_json)
         | 
| 39 | 
            +
              hashed_rows[key] ||= []
         | 
| 40 | 
            +
              hashed_rows[key] << idx
         | 
| 41 | 
            +
            end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            duplicate_rows = {}
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            hashed_rows.reject! { |key, row_numbers| row_numbers.size <= 1 }
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            hashed_rows.each do |key, row_numbers|
         | 
| 48 | 
            +
              hashed_rows[key] = {}
         | 
| 49 | 
            +
              row_numbers.each do |row_number|
         | 
| 50 | 
            +
                duplicate_rows[row_number] = key
         | 
| 51 | 
            +
                hashed_rows[key][row_number] = nil
         | 
| 52 | 
            +
              end
         | 
| 53 | 
            +
            end
         | 
| 54 | 
            +
             | 
| 55 | 
            +
            csv.each_with_index do |row, idx|
         | 
| 56 | 
            +
              next unless (key = duplicate_rows[idx])
         | 
| 57 | 
            +
             | 
| 58 | 
            +
              hashed_rows[key][idx] = row
         | 
| 59 | 
            +
            end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
            CSV.open('duplicates-' + File.basename(ARGV[0]), 'wb') do |out|
         | 
| 62 | 
            +
              out << ['duplicate_key', 'row_no'] + csv.first.keys
         | 
| 63 | 
            +
             | 
| 64 | 
            +
              hashed_rows.each do |key, rows|
         | 
| 65 | 
            +
                rows.each do |idx, row|
         | 
| 66 | 
            +
                  out << [key, idx] + row.values
         | 
| 67 | 
            +
                end
         | 
| 68 | 
            +
              end
         | 
| 69 | 
            +
            end
         | 
    
        data/csv-utils.gemspec
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: csv-utils
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.3. | 
| 4 | 
            +
              version: 0.3.15
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Doug Youch
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2022-08-25 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: inheritance-helper
         | 
| @@ -28,6 +28,7 @@ description: Tools for debugging malformed CSV files | |
| 28 28 | 
             
            email: dougyouch@gmail.com
         | 
| 29 29 | 
             
            executables:
         | 
| 30 30 | 
             
            - csv-change-eol
         | 
| 31 | 
            +
            - csv-duplicate-finder
         | 
| 31 32 | 
             
            - csv-explorer
         | 
| 32 33 | 
             
            - csv-find-error
         | 
| 33 34 | 
             
            - csv-readline
         | 
| @@ -43,6 +44,7 @@ files: | |
| 43 44 | 
             
            - LICENSE
         | 
| 44 45 | 
             
            - README.md
         | 
| 45 46 | 
             
            - bin/csv-change-eol
         | 
| 47 | 
            +
            - bin/csv-duplicate-finder
         | 
| 46 48 | 
             
            - bin/csv-explorer
         | 
| 47 49 | 
             
            - bin/csv-find-error
         | 
| 48 50 | 
             
            - bin/csv-readline
         | 
| @@ -78,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 78 80 | 
             
                - !ruby/object:Gem::Version
         | 
| 79 81 | 
             
                  version: '0'
         | 
| 80 82 | 
             
            requirements: []
         | 
| 81 | 
            -
            rubygems_version: 3. | 
| 83 | 
            +
            rubygems_version: 3.3.3
         | 
| 82 84 | 
             
            signing_key:
         | 
| 83 85 | 
             
            specification_version: 4
         | 
| 84 86 | 
             
            summary: CSV Utils
         |