csv-utils 0.3.4 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e649e8e220856a0676e01ef58811ab9df3822a1757eae4e776a45256063c510
4
- data.tar.gz: fad8bcac595659bc5d91b4ebc19ee8f18ebbdf910568e75ff812768e1c23515f
3
+ metadata.gz: b5b6f30da64b899586ef7d801904b8da9b01b6053f9d975f0702edcc5a7b65e0
4
+ data.tar.gz: e7e5006c6b63a9b8472e4658c748071619d09adf0ca73a84109b43b8862f1bcd
5
5
  SHA512:
6
- metadata.gz: 4e4a26c3494c466a0099f72db534b8bd73910444c6eadf1b1faa78213d225b6752e9e8b60818bbeabe57f4542bd235da1969a9b6a3bad968328622531f89f077
7
- data.tar.gz: d35d7a46f4f58a0fe87e10c6bcfec3ededc840d5d3c7ce0f1e5162e22f6f9ddb1516ed23a59b93877529ae326d597dfbde5089d9676393d488e476f6a9ad0924
6
+ metadata.gz: 7ded9a4318f44f77b6c2ccf72a844a7cec734745052c62be5e19c2dbadc8e989b3c89528058fabfd63cbcb59a1575293b9cc87a343bb4d68dbad3051ba04616d
7
+ data.tar.gz: dd8f0d7cd953eee05489423d0ef55e0b486500d2499d442d7d523636ced12858a2e9cec7ad60ce9d0126516121674d3c1e2a879ea89bb3a93cf7100709204b9d
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.6.3
1
+ 3.0.0
data/Gemfile.lock CHANGED
@@ -1,50 +1,52 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- ast (2.4.1)
5
- diff-lcs (1.3)
6
- docile (1.3.2)
4
+ ast (2.4.2)
5
+ diff-lcs (1.4.4)
6
+ docile (1.3.5)
7
7
  inheritance-helper (0.1.5)
8
- parallel (1.19.2)
9
- parser (2.7.1.4)
8
+ parallel (1.20.1)
9
+ parser (3.0.0.0)
10
10
  ast (~> 2.4.1)
11
11
  rainbow (3.0.0)
12
- rake (13.0.1)
13
- regexp_parser (1.7.1)
12
+ rake (13.0.3)
13
+ regexp_parser (2.1.1)
14
14
  rexml (3.2.4)
15
- rspec (3.9.0)
16
- rspec-core (~> 3.9.0)
17
- rspec-expectations (~> 3.9.0)
18
- rspec-mocks (~> 3.9.0)
19
- rspec-core (3.9.2)
20
- rspec-support (~> 3.9.3)
21
- rspec-expectations (3.9.2)
15
+ rspec (3.10.0)
16
+ rspec-core (~> 3.10.0)
17
+ rspec-expectations (~> 3.10.0)
18
+ rspec-mocks (~> 3.10.0)
19
+ rspec-core (3.10.1)
20
+ rspec-support (~> 3.10.0)
21
+ rspec-expectations (3.10.1)
22
22
  diff-lcs (>= 1.2.0, < 2.0)
23
- rspec-support (~> 3.9.0)
24
- rspec-mocks (3.9.1)
23
+ rspec-support (~> 3.10.0)
24
+ rspec-mocks (3.10.2)
25
25
  diff-lcs (>= 1.2.0, < 2.0)
26
- rspec-support (~> 3.9.0)
27
- rspec-support (3.9.3)
28
- rubocop (0.86.0)
26
+ rspec-support (~> 3.10.0)
27
+ rspec-support (3.10.2)
28
+ rubocop (1.11.0)
29
29
  parallel (~> 1.10)
30
- parser (>= 2.7.0.1)
30
+ parser (>= 3.0.0.0)
31
31
  rainbow (>= 2.2.2, < 4.0)
32
- regexp_parser (>= 1.7)
32
+ regexp_parser (>= 1.8, < 3.0)
33
33
  rexml
34
- rubocop-ast (>= 0.0.3, < 1.0)
34
+ rubocop-ast (>= 1.2.0, < 2.0)
35
35
  ruby-progressbar (~> 1.7)
36
- unicode-display_width (>= 1.4.0, < 2.0)
37
- rubocop-ast (0.0.3)
38
- parser (>= 2.7.0.1)
39
- ruby-progressbar (1.10.1)
40
- simplecov (0.18.5)
36
+ unicode-display_width (>= 1.4.0, < 3.0)
37
+ rubocop-ast (1.4.1)
38
+ parser (>= 2.7.1.5)
39
+ ruby-progressbar (1.11.0)
40
+ simplecov (0.21.2)
41
41
  docile (~> 1.1)
42
42
  simplecov-html (~> 0.11)
43
- simplecov-html (0.12.2)
44
- unicode-display_width (1.7.0)
43
+ simplecov_json_formatter (~> 0.1)
44
+ simplecov-html (0.12.3)
45
+ simplecov_json_formatter (0.1.2)
46
+ unicode-display_width (2.0.0)
45
47
 
46
48
  PLATFORMS
47
- ruby
49
+ x86_64-darwin-20
48
50
 
49
51
  DEPENDENCIES
50
52
  inheritance-helper
@@ -54,4 +56,4 @@ DEPENDENCIES
54
56
  simplecov
55
57
 
56
58
  BUNDLED WITH
57
- 1.17.3
59
+ 2.2.3
data/bin/csv-explorer ADDED
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv-utils'
4
+ require 'irb'
5
+
6
+ CSV_FILE = ARGV.shift
7
+
8
+ def csv
9
+ @csv ||= CSVUtils::CSVIterator.new(CSV_FILE)
10
+ end
11
+
12
+ IRB.start
data/bin/csv-find-error CHANGED
@@ -3,10 +3,12 @@
3
3
  require 'csv'
4
4
  require 'shellwords'
5
5
 
6
+ prev_row = nil
6
7
  begin
7
- CSV.open(ARGV[0], 'rb').each { }
8
+ CSV.open(ARGV[0], 'rb').each { |row| prev_row = row }
8
9
  rescue CSV::MalformedCSVError => e
9
10
  puts e.class.to_s + ': ' + e.message
11
+ puts "previous row was #{prev_row}"
10
12
  if e.message =~ /line (\d+)/
11
13
  lineno = $1.to_i
12
14
  cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
data/bin/csv-validator CHANGED
@@ -35,20 +35,24 @@ id_column_name = ARGV[1]
35
35
  headers = csv.shift
36
36
  strip_bom!(headers[0])
37
37
 
38
- id_column_num = nil
39
- if id_column_name
40
- unless headers.include?(id_column_name)
41
- $stderr.puts("header #{id_column_name} not found in current set of headers")
42
- exit 1
43
- end
44
-
45
- id_column_num = headers.index(id_column_name)
38
+ id_column_name ||= headers[0]
39
+ unless headers.include?(id_column_name)
40
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
41
+ exit 1
46
42
  end
47
43
 
44
+ id_column_num = headers.index(id_column_name)
45
+
48
46
  out = nil
49
- if id_column_num
50
- out = CSV.open('utf8-correctsion.csv', 'wb')
51
- out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
47
+ out_proc = Proc.new do |row|
48
+ out ||=
49
+ begin
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ out
53
+ end
54
+
55
+ out << row
52
56
  end
53
57
 
54
58
  csv_lineno = 1
@@ -61,12 +65,12 @@ while (row = csv.shift)
61
65
  end
62
66
 
63
67
  row.each_with_index do |col, idx|
64
- next if utf8?(col)
68
+ next if col.nil? || utf8?(col)
65
69
 
66
70
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
67
71
  if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
68
72
  puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
69
- out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
73
+ out_proc.call [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
70
74
  else
71
75
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
72
76
  end
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.4'
5
+ s.version = '0.3.9'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
data/lib/csv-utils.rb CHANGED
@@ -2,7 +2,9 @@ require 'csv'
2
2
 
3
3
  # Collection of tools for working with CSV files.
4
4
  module CSVUtils
5
+ autoload :CSVCompare, 'csv_utils/csv_compare'
5
6
  autoload :CSVExtender, 'csv_utils/csv_extender'
7
+ autoload :CSVIterator, 'csv_utils/csv_iterator'
6
8
  autoload :CSVOptions, 'csv_utils/csv_options'
7
9
  autoload :CSVReport, 'csv_utils/csv_report'
8
10
  autoload :CSVRow, 'csv_utils/csv_row'
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSVUtils::CSVCompare purpose is to determine which rows in the secondary_data_file need to be created, deleted or updated
4
+ # **requires both CSV files to be sorted on the same columns, CSVUtils::CSVSort can accomplish this
5
+ # In order to receive updates, update_comparison_columns must configured or use inheritance and change the update_row? method
6
+ class CSVUtils::CSVCompare
7
+ # primary_data_file is the source of truth
8
+ # compare_proc used to compare the id column(s)
9
+ # update_comparison_columns column(s) to compare for equality, ex: updated_at, timestamp, hash
10
+ # caveat: update_comparison_columns need to be in both csv files
11
+ attr_reader :primary_data_file,
12
+ :update_comparison_columns,
13
+ :compare_proc
14
+
15
+ def initialize(primary_data_file, update_comparison_columns=nil, &block)
16
+ @primary_data_file = primary_data_file
17
+ @update_comparison_columns = update_comparison_columns
18
+ @compare_proc = block
19
+ end
20
+
21
+ def compare(secondary_data_file)
22
+ src = CSV.open(primary_data_file)
23
+ src_headers = src.shift
24
+ dest = CSV.open(secondary_data_file)
25
+ dest_headers = dest.shift
26
+
27
+ read_next_src = true
28
+ read_next_dest = true
29
+
30
+ while(!src.eof? || !dest.eof?)
31
+ src_record = next_record_from_file(src_headers, src) if read_next_src
32
+ dest_record = next_record_from_file(dest_headers, dest) if read_next_dest
33
+
34
+ if ! src_record
35
+ read_next_src = false
36
+ read_next_dest = true
37
+
38
+ yield :delete, dest_record
39
+ elsif ! dest_record
40
+ read_next_src = true
41
+ read_next_dest = false
42
+
43
+ yield :create, src_record
44
+ elsif compare_proc.call(src_record, dest_record) == 0
45
+ read_next_src = true
46
+ read_next_dest = true
47
+
48
+ yield(:update, src_record) if update_row?(src_record, dest_record)
49
+ elsif compare_proc.call(src_record, dest_record) > 0
50
+ read_next_src = false
51
+ read_next_dest = true
52
+
53
+ yield :delete, dest_record
54
+ else
55
+ read_next_src = true
56
+ read_next_dest = false
57
+
58
+ yield :create, src_record
59
+ end
60
+ end
61
+
62
+ src.close
63
+ dest.close
64
+ end
65
+
66
+ private
67
+
68
+ def next_record_from_file(headers, file)
69
+ return nil if file.eof?
70
+
71
+ Hash[headers.zip(file.shift)]
72
+ end
73
+
74
+ def update_row?(src_record, dest_record)
75
+ return false unless update_comparison_columns
76
+
77
+ update_comparison_columns.each do |column_name|
78
+ return true unless src_record[column_name] == dest_record[column_name]
79
+ end
80
+
81
+ false
82
+ end
83
+ end
@@ -0,0 +1,51 @@
1
+ # Search a CSV given a series of steps
2
+ class CSVUtils::CSVIterator
3
+ include Enumerable
4
+
5
+ attr_reader :prev_row
6
+
7
+ class RowWrapper < Hash
8
+ attr_accessor :lineno
9
+
10
+ def self.create(headers, row, lineno)
11
+ row_wrapper = RowWrapper[headers.zip(row)]
12
+ row_wrapper.lineno = lineno
13
+ row_wrapper
14
+ end
15
+
16
+ def to_pretty_s
17
+ reject { |_, v| v.strip.empty? }
18
+ .each_with_index
19
+ .map { |(k, v), idx| sprintf(' %-3d %s: %s', idx+1, k, v) }
20
+ .join("\n") + "\n"
21
+ end
22
+ end
23
+
24
+ def initialize(src_csv, csv_options = {})
25
+ @src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
26
+ end
27
+
28
+ def each(headers = nil)
29
+ @src_csv.rewind
30
+
31
+ lineno = 0
32
+ unless headers
33
+ headers = @src_csv.shift
34
+ strip_bom!(headers[0])
35
+ lineno += 1
36
+ end
37
+
38
+ @prev_row = nil
39
+ while (row = @src_csv.shift)
40
+ lineno += 1
41
+ yield RowWrapper.create(headers, row, lineno)
42
+ @prev_row = row
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def strip_bom!(col)
49
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
50
+ end
51
+ end
@@ -9,19 +9,20 @@ module CSVUtils
9
9
  if csv.is_a?(String)
10
10
  @must_close = true
11
11
  mode = csv_options.delete(:mode) || 'wb'
12
- CSV.open(csv, mode, csv_options)
12
+ CSV.open(csv, mode, **csv_options)
13
13
  else
14
14
  @must_close = false
15
15
  csv
16
16
  end
17
17
 
18
- generate(headers, &block) if block
18
+ add_headers(headers) if headers
19
+
20
+ generate(&block) if block
19
21
  end
20
22
 
21
- def generate(headers = nil)
22
- add_headers(headers) if headers
23
+ def generate
23
24
  yield self
24
- @csv.close if @must_close
25
+ close if @must_close
25
26
  end
26
27
 
27
28
  def append(csv_row)
@@ -37,5 +38,9 @@ module CSVUtils
37
38
  def add_headers(csv_row)
38
39
  append(csv_row.is_a?(Array) ? csv_row : csv_row.csv_headers)
39
40
  end
41
+
42
+ def close
43
+ @csv.close
44
+ end
40
45
  end
41
46
  end
@@ -25,9 +25,9 @@ class CSVUtils::CSVSort
25
25
  private
26
26
 
27
27
  def merge_sort_csv_files(src_csv_file1, src_csv_file2, dest_csv_file)
28
- src1 = CSV.open(src_csv_file1, 'rb', csv_options)
29
- src2 = CSV.open(src_csv_file2, 'rb', csv_options)
30
- dest = CSV.open(dest_csv_file, 'wb', csv_options)
28
+ src1 = CSV.open(src_csv_file1, 'rb', **csv_options)
29
+ src2 = CSV.open(src_csv_file2, 'rb', **csv_options)
30
+ dest = CSV.open(dest_csv_file, 'wb', **csv_options)
31
31
 
32
32
  if @headers
33
33
  dest << @headers
@@ -66,7 +66,7 @@ class CSVUtils::CSVSort
66
66
  end
67
67
 
68
68
  def create_sorted_csv_part_files(batch_size, &block)
69
- src = CSV.open(csv_file, 'rb', csv_options)
69
+ src = CSV.open(csv_file, 'rb', **csv_options)
70
70
 
71
71
  @headers = src.shift if has_headers
72
72
 
@@ -74,7 +74,7 @@ class CSVUtils::CSVSort
74
74
  create_batch_part_proc = Proc.new do
75
75
  batch.sort!(&block)
76
76
  @csv_part_files << "#{new_csv_file}.part.#{@csv_part_files.size}"
77
- CSV.open(@csv_part_files.last, 'wb', csv_options) do |csv|
77
+ CSV.open(@csv_part_files.last, 'wb', **csv_options) do |csv|
78
78
  csv << @headers if @headers
79
79
  batch.each { |row| csv << row }
80
80
  end
@@ -107,6 +107,6 @@ class CSVUtils::CSVSort
107
107
  File.unlink(csv_part_file2)
108
108
  end
109
109
 
110
- FileUtils.mv(@csv_part_files.last, new_csv_file)
110
+ FileUtils.mv(@csv_part_files.last || @csv_file, new_csv_file)
111
111
  end
112
112
  end
@@ -20,7 +20,7 @@ class CSVUtils::CSVWrapper
20
20
  def open(csv, mode, csv_options)
21
21
  if csv.is_a?(String)
22
22
  @close_when_done = true
23
- @csv = CSV.open(csv, mode, csv_options)
23
+ @csv = CSV.open(csv, mode, **csv_options)
24
24
  else
25
25
  @close_when_done = false
26
26
  @csv = csv
@@ -35,6 +35,10 @@ class CSVUtils::CSVWrapper
35
35
  csv.shift
36
36
  end
37
37
 
38
+ def rewind
39
+ csv.rewind
40
+ end
41
+
38
42
  def close
39
43
  csv.close if close_when_done?
40
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-20 00:00:00.000000000 Z
11
+ date: 2021-03-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -28,6 +28,7 @@ description: Tools for debugging malformed CSV files
28
28
  email: dougyouch@gmail.com
29
29
  executables:
30
30
  - csv-change-eol
31
+ - csv-explorer
31
32
  - csv-find-error
32
33
  - csv-readline
33
34
  - csv-validator
@@ -42,12 +43,15 @@ files:
42
43
  - LICENSE
43
44
  - README.md
44
45
  - bin/csv-change-eol
46
+ - bin/csv-explorer
45
47
  - bin/csv-find-error
46
48
  - bin/csv-readline
47
49
  - bin/csv-validator
48
50
  - csv-utils.gemspec
49
51
  - lib/csv-utils.rb
52
+ - lib/csv_utils/csv_compare.rb
50
53
  - lib/csv_utils/csv_extender.rb
54
+ - lib/csv_utils/csv_iterator.rb
51
55
  - lib/csv_utils/csv_options.rb
52
56
  - lib/csv_utils/csv_report.rb
53
57
  - lib/csv_utils/csv_row.rb
@@ -74,7 +78,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
74
78
  - !ruby/object:Gem::Version
75
79
  version: '0'
76
80
  requirements: []
77
- rubygems_version: 3.0.8
81
+ rubygems_version: 3.2.3
78
82
  signing_key:
79
83
  specification_version: 4
80
84
  summary: CSV Utils