csv-utils 0.3.7 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0248ce0ef1db03a751c864fdf52194b25a21f21ef90cbeaf8d0ae07075cbaf51'
4
- data.tar.gz: c48e4f907963b1ace89fecb095f2a7ee17ec1670db2371dd4e015005800b757b
3
+ metadata.gz: d7b43801f0f7e62d34d8a77508964f0b6ea054a3ac2c8d17bf42d22aa0b245bd
4
+ data.tar.gz: ccd3282e4c7b1e1985439e24a159fe09c570b18446adee2b8db4776bc6024852
5
5
  SHA512:
6
- metadata.gz: 848fa57de005d00bdf24cd07b533680cd9307c07a19b2fb955391737e36407dfc6be22edef55742fb0c90e8ed5273d4c584840e7d7a657404c1242947a88692b
7
- data.tar.gz: 71209d88db0d67ed606e4db56e3cf667081074523373951fea44698537af0661cf63c79eb9b971419bb13bfc2a8b1b9c10c2f89e911b913b4f6c4bc8d00ee061
6
+ metadata.gz: 1684319de3c4f728f55e1c201790dd5cc3aa110e09d14a4043fcdf3b70c02568fe898c4f2d7b839e633f461294f9870fae0a8cf2d3d785d173aa66f9c8ba2ab1
7
+ data.tar.gz: 6e7689f6f544e5280dbf144dc5a23a4b088aeff6c06931c0a856f6cd16353048e4ed343da0406550d2e7f48abd8af6e1ef31f6114ec9f3435aa56815c5fea24b
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.6.3
1
+ 3.0.0
data/Gemfile.lock CHANGED
@@ -1,50 +1,52 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- ast (2.4.1)
5
- diff-lcs (1.3)
6
- docile (1.3.2)
4
+ ast (2.4.2)
5
+ diff-lcs (1.4.4)
6
+ docile (1.3.5)
7
7
  inheritance-helper (0.1.5)
8
- parallel (1.19.2)
9
- parser (2.7.1.4)
8
+ parallel (1.20.1)
9
+ parser (3.0.0.0)
10
10
  ast (~> 2.4.1)
11
11
  rainbow (3.0.0)
12
- rake (13.0.1)
13
- regexp_parser (1.7.1)
12
+ rake (13.0.3)
13
+ regexp_parser (2.1.1)
14
14
  rexml (3.2.4)
15
- rspec (3.9.0)
16
- rspec-core (~> 3.9.0)
17
- rspec-expectations (~> 3.9.0)
18
- rspec-mocks (~> 3.9.0)
19
- rspec-core (3.9.2)
20
- rspec-support (~> 3.9.3)
21
- rspec-expectations (3.9.2)
15
+ rspec (3.10.0)
16
+ rspec-core (~> 3.10.0)
17
+ rspec-expectations (~> 3.10.0)
18
+ rspec-mocks (~> 3.10.0)
19
+ rspec-core (3.10.1)
20
+ rspec-support (~> 3.10.0)
21
+ rspec-expectations (3.10.1)
22
22
  diff-lcs (>= 1.2.0, < 2.0)
23
- rspec-support (~> 3.9.0)
24
- rspec-mocks (3.9.1)
23
+ rspec-support (~> 3.10.0)
24
+ rspec-mocks (3.10.2)
25
25
  diff-lcs (>= 1.2.0, < 2.0)
26
- rspec-support (~> 3.9.0)
27
- rspec-support (3.9.3)
28
- rubocop (0.86.0)
26
+ rspec-support (~> 3.10.0)
27
+ rspec-support (3.10.2)
28
+ rubocop (1.11.0)
29
29
  parallel (~> 1.10)
30
- parser (>= 2.7.0.1)
30
+ parser (>= 3.0.0.0)
31
31
  rainbow (>= 2.2.2, < 4.0)
32
- regexp_parser (>= 1.7)
32
+ regexp_parser (>= 1.8, < 3.0)
33
33
  rexml
34
- rubocop-ast (>= 0.0.3, < 1.0)
34
+ rubocop-ast (>= 1.2.0, < 2.0)
35
35
  ruby-progressbar (~> 1.7)
36
- unicode-display_width (>= 1.4.0, < 2.0)
37
- rubocop-ast (0.0.3)
38
- parser (>= 2.7.0.1)
39
- ruby-progressbar (1.10.1)
40
- simplecov (0.18.5)
36
+ unicode-display_width (>= 1.4.0, < 3.0)
37
+ rubocop-ast (1.4.1)
38
+ parser (>= 2.7.1.5)
39
+ ruby-progressbar (1.11.0)
40
+ simplecov (0.21.2)
41
41
  docile (~> 1.1)
42
42
  simplecov-html (~> 0.11)
43
- simplecov-html (0.12.2)
44
- unicode-display_width (1.7.0)
43
+ simplecov_json_formatter (~> 0.1)
44
+ simplecov-html (0.12.3)
45
+ simplecov_json_formatter (0.1.2)
46
+ unicode-display_width (2.0.0)
45
47
 
46
48
  PLATFORMS
47
- ruby
49
+ x86_64-darwin-20
48
50
 
49
51
  DEPENDENCIES
50
52
  inheritance-helper
@@ -54,4 +56,4 @@ DEPENDENCIES
54
56
  simplecov
55
57
 
56
58
  BUNDLED WITH
57
- 1.17.3
59
+ 2.2.3
data/bin/csv-validator CHANGED
@@ -35,20 +35,24 @@ id_column_name = ARGV[1]
35
35
  headers = csv.shift
36
36
  strip_bom!(headers[0])
37
37
 
38
- id_column_num = nil
39
- if id_column_name
40
- unless headers.include?(id_column_name)
41
- $stderr.puts("header #{id_column_name} not found in current set of headers")
42
- exit 1
43
- end
44
-
45
- id_column_num = headers.index(id_column_name)
38
+ id_column_name ||= headers[0]
39
+ unless headers.include?(id_column_name)
40
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
41
+ exit 1
46
42
  end
47
43
 
44
+ id_column_num = headers.index(id_column_name)
45
+
48
46
  out = nil
49
- if id_column_num
50
- out = CSV.open('utf8-correctsion.csv', 'wb')
51
- out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
47
+ out_proc = Proc.new do |row|
48
+ out ||=
49
+ begin
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ out
53
+ end
54
+
55
+ out << row
52
56
  end
53
57
 
54
58
  csv_lineno = 1
@@ -66,7 +70,7 @@ while (row = csv.shift)
66
70
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
67
71
  if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
68
72
  puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
69
- out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
73
+ out_proc.call [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
70
74
  else
71
75
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
72
76
  end
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.7'
5
+ s.version = '0.3.12'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
data/lib/csv-utils.rb CHANGED
@@ -2,6 +2,7 @@ require 'csv'
2
2
 
3
3
  # Collection of tools for working with CSV files.
4
4
  module CSVUtils
5
+ autoload :CSVCompare, 'csv_utils/csv_compare'
5
6
  autoload :CSVExtender, 'csv_utils/csv_extender'
6
7
  autoload :CSVIterator, 'csv_utils/csv_iterator'
7
8
  autoload :CSVOptions, 'csv_utils/csv_options'
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSVUtils::CSVCompare purpose is to determine which rows in the secondary_data_file need to be created, deleted or updated
4
+ # **requires both CSV files to be sorted on the same columns, CSVUtils::CSVSort can accomplish this
5
+ # In order to receive updates, update_comparison_columns must configured or use inheritance and change the update_row? method
6
+ class CSVUtils::CSVCompare
7
+ # primary_data_file is the source of truth
8
+ # compare_proc used to compare the id column(s)
9
+ # update_comparison_columns column(s) to compare for equality, ex: updated_at, timestamp, hash
10
+ # caveat: update_comparison_columns need to be in both csv files
11
+ attr_reader :primary_data_file,
12
+ :update_comparison_columns,
13
+ :compare_proc
14
+
15
+ def initialize(primary_data_file, update_comparison_columns=nil, &block)
16
+ @primary_data_file = primary_data_file
17
+ @update_comparison_columns = update_comparison_columns
18
+ @compare_proc = block
19
+ end
20
+
21
+ def compare(secondary_data_file)
22
+ src = CSV.open(primary_data_file)
23
+ src_headers = src.shift
24
+ dest = CSV.open(secondary_data_file)
25
+ dest_headers = dest.shift
26
+
27
+ read_next_src = true
28
+ read_next_dest = true
29
+
30
+ while(!src.eof? || !dest.eof?)
31
+ src_record = next_record_from_file(src_headers, src) if read_next_src
32
+ dest_record = next_record_from_file(dest_headers, dest) if read_next_dest
33
+
34
+ if ! src_record
35
+ read_next_src = false
36
+ read_next_dest = true
37
+
38
+ yield :delete, dest_record
39
+ elsif ! dest_record
40
+ read_next_src = true
41
+ read_next_dest = false
42
+
43
+ yield :create, src_record
44
+ elsif compare_proc.call(src_record, dest_record) == 0
45
+ read_next_src = true
46
+ read_next_dest = true
47
+
48
+ yield(:update, src_record) if update_row?(src_record, dest_record)
49
+ elsif compare_proc.call(src_record, dest_record) > 0
50
+ read_next_src = false
51
+ read_next_dest = true
52
+
53
+ yield :delete, dest_record
54
+ else
55
+ read_next_src = true
56
+ read_next_dest = false
57
+
58
+ yield :create, src_record
59
+ end
60
+ end
61
+
62
+ src.close
63
+ dest.close
64
+ end
65
+
66
+ private
67
+
68
+ def next_record_from_file(headers, file)
69
+ return nil if file.eof?
70
+
71
+ Hash[headers.zip(file.shift)]
72
+ end
73
+
74
+ def update_row?(src_record, dest_record)
75
+ return false unless update_comparison_columns
76
+
77
+ update_comparison_columns.each do |column_name|
78
+ return true unless src_record[column_name] == dest_record[column_name]
79
+ end
80
+
81
+ false
82
+ end
83
+ end
@@ -14,7 +14,7 @@ class CSVUtils::CSVIterator
14
14
  end
15
15
 
16
16
  def to_pretty_s
17
- reject { |_, v| v.strip.empty? }
17
+ reject { |_, v| v.nil? || v.strip.empty? }
18
18
  .each_with_index
19
19
  .map { |(k, v), idx| sprintf(' %-3d %s: %s', idx+1, k, v) }
20
20
  .join("\n") + "\n"
@@ -43,6 +43,26 @@ class CSVUtils::CSVIterator
43
43
  end
44
44
  end
45
45
 
46
+ def headers
47
+ first.keys
48
+ end
49
+
50
+ def to_hash(key, value = nil)
51
+ raise("header #{key} not found in #{headers}") unless headers.include?(key)
52
+ raise("headers #{value} not found in #{headers}") if value && !headers.include?(value)
53
+
54
+ value_proc =
55
+ if value
56
+ proc { |row| row[value] }
57
+ else
58
+ proc { |row| yield(row) }
59
+ end
60
+
61
+ each_with_object({}) do |row, hsh|
62
+ hsh[row[key]] = value_proc.call(row)
63
+ end
64
+ end
65
+
46
66
  private
47
67
 
48
68
  def strip_bom!(col)
@@ -9,17 +9,18 @@ module CSVUtils
9
9
  if csv.is_a?(String)
10
10
  @must_close = true
11
11
  mode = csv_options.delete(:mode) || 'wb'
12
- CSV.open(csv, mode, csv_options)
12
+ CSV.open(csv, mode, **csv_options)
13
13
  else
14
14
  @must_close = false
15
15
  csv
16
16
  end
17
17
 
18
- generate(headers, &block) if block
18
+ add_headers(headers) if headers
19
+
20
+ generate(&block) if block
19
21
  end
20
22
 
21
- def generate(headers = nil)
22
- add_headers(headers) if headers
23
+ def generate
23
24
  yield self
24
25
  close if @must_close
25
26
  end
@@ -25,9 +25,9 @@ class CSVUtils::CSVSort
25
25
  private
26
26
 
27
27
  def merge_sort_csv_files(src_csv_file1, src_csv_file2, dest_csv_file)
28
- src1 = CSV.open(src_csv_file1, 'rb', csv_options)
29
- src2 = CSV.open(src_csv_file2, 'rb', csv_options)
30
- dest = CSV.open(dest_csv_file, 'wb', csv_options)
28
+ src1 = CSV.open(src_csv_file1, 'rb', **csv_options)
29
+ src2 = CSV.open(src_csv_file2, 'rb', **csv_options)
30
+ dest = CSV.open(dest_csv_file, 'wb', **csv_options)
31
31
 
32
32
  if @headers
33
33
  dest << @headers
@@ -66,7 +66,7 @@ class CSVUtils::CSVSort
66
66
  end
67
67
 
68
68
  def create_sorted_csv_part_files(batch_size, &block)
69
- src = CSV.open(csv_file, 'rb', csv_options)
69
+ src = CSV.open(csv_file, 'rb', **csv_options)
70
70
 
71
71
  @headers = src.shift if has_headers
72
72
 
@@ -74,7 +74,7 @@ class CSVUtils::CSVSort
74
74
  create_batch_part_proc = Proc.new do
75
75
  batch.sort!(&block)
76
76
  @csv_part_files << "#{new_csv_file}.part.#{@csv_part_files.size}"
77
- CSV.open(@csv_part_files.last, 'wb', csv_options) do |csv|
77
+ CSV.open(@csv_part_files.last, 'wb', **csv_options) do |csv|
78
78
  csv << @headers if @headers
79
79
  batch.each { |row| csv << row }
80
80
  end
@@ -107,6 +107,10 @@ class CSVUtils::CSVSort
107
107
  File.unlink(csv_part_file2)
108
108
  end
109
109
 
110
- FileUtils.mv(@csv_part_files.last, new_csv_file)
110
+ if @csv_part_files.size > 0
111
+ FileUtils.mv(@csv_part_files.last, new_csv_file)
112
+ else
113
+ FileUtils.cp(@csv_file, new_csv_file)
114
+ end
111
115
  end
112
116
  end
@@ -20,7 +20,7 @@ class CSVUtils::CSVWrapper
20
20
  def open(csv, mode, csv_options)
21
21
  if csv.is_a?(String)
22
22
  @close_when_done = true
23
- @csv = CSV.open(csv, mode, csv_options)
23
+ @csv = CSV.open(csv, mode, **csv_options)
24
24
  else
25
25
  @close_when_done = false
26
26
  @csv = csv
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.7
4
+ version: 0.3.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-05 00:00:00.000000000 Z
11
+ date: 2021-05-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -49,6 +49,7 @@ files:
49
49
  - bin/csv-validator
50
50
  - csv-utils.gemspec
51
51
  - lib/csv-utils.rb
52
+ - lib/csv_utils/csv_compare.rb
52
53
  - lib/csv_utils/csv_extender.rb
53
54
  - lib/csv_utils/csv_iterator.rb
54
55
  - lib/csv_utils/csv_options.rb
@@ -77,7 +78,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
78
  - !ruby/object:Gem::Version
78
79
  version: '0'
79
80
  requirements: []
80
- rubygems_version: 3.0.8
81
+ rubygems_version: 3.2.3
81
82
  signing_key:
82
83
  specification_version: 4
83
84
  summary: CSV Utils