csv-utils 0.3.8 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0847ad28aee7ef73e5bfa9cbd0753e42fed5097ae68bfc2083f6ed2a6d08d66
4
- data.tar.gz: '080a53543deadaa49e06616aa9f563818d3a5bbe13b76366f1f795f075be7e41'
3
+ metadata.gz: b5b6f30da64b899586ef7d801904b8da9b01b6053f9d975f0702edcc5a7b65e0
4
+ data.tar.gz: e7e5006c6b63a9b8472e4658c748071619d09adf0ca73a84109b43b8862f1bcd
5
5
  SHA512:
6
- metadata.gz: 8504ec3c569e92e3c7adc141af4077e04085b8befdad8a15c76f05405045dad52821aff1fc26d792eba4d533ea588794d9ab88f96f5c37e2cf7146624090eded
7
- data.tar.gz: f4dfa17b0ec81d5923e3ff32978f584a64d567aa8c770dd7b252770f1535ae4eba578dcec105f201ae006929114c90e1615196c6d28ac6fd36bd9a89fb978617
6
+ metadata.gz: 7ded9a4318f44f77b6c2ccf72a844a7cec734745052c62be5e19c2dbadc8e989b3c89528058fabfd63cbcb59a1575293b9cc87a343bb4d68dbad3051ba04616d
7
+ data.tar.gz: dd8f0d7cd953eee05489423d0ef55e0b486500d2499d442d7d523636ced12858a2e9cec7ad60ce9d0126516121674d3c1e2a879ea89bb3a93cf7100709204b9d
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.6.3
1
+ 3.0.0
data/Gemfile.lock CHANGED
@@ -1,50 +1,52 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- ast (2.4.1)
5
- diff-lcs (1.3)
6
- docile (1.3.2)
4
+ ast (2.4.2)
5
+ diff-lcs (1.4.4)
6
+ docile (1.3.5)
7
7
  inheritance-helper (0.1.5)
8
- parallel (1.19.2)
9
- parser (2.7.1.4)
8
+ parallel (1.20.1)
9
+ parser (3.0.0.0)
10
10
  ast (~> 2.4.1)
11
11
  rainbow (3.0.0)
12
- rake (13.0.1)
13
- regexp_parser (1.7.1)
12
+ rake (13.0.3)
13
+ regexp_parser (2.1.1)
14
14
  rexml (3.2.4)
15
- rspec (3.9.0)
16
- rspec-core (~> 3.9.0)
17
- rspec-expectations (~> 3.9.0)
18
- rspec-mocks (~> 3.9.0)
19
- rspec-core (3.9.2)
20
- rspec-support (~> 3.9.3)
21
- rspec-expectations (3.9.2)
15
+ rspec (3.10.0)
16
+ rspec-core (~> 3.10.0)
17
+ rspec-expectations (~> 3.10.0)
18
+ rspec-mocks (~> 3.10.0)
19
+ rspec-core (3.10.1)
20
+ rspec-support (~> 3.10.0)
21
+ rspec-expectations (3.10.1)
22
22
  diff-lcs (>= 1.2.0, < 2.0)
23
- rspec-support (~> 3.9.0)
24
- rspec-mocks (3.9.1)
23
+ rspec-support (~> 3.10.0)
24
+ rspec-mocks (3.10.2)
25
25
  diff-lcs (>= 1.2.0, < 2.0)
26
- rspec-support (~> 3.9.0)
27
- rspec-support (3.9.3)
28
- rubocop (0.86.0)
26
+ rspec-support (~> 3.10.0)
27
+ rspec-support (3.10.2)
28
+ rubocop (1.11.0)
29
29
  parallel (~> 1.10)
30
- parser (>= 2.7.0.1)
30
+ parser (>= 3.0.0.0)
31
31
  rainbow (>= 2.2.2, < 4.0)
32
- regexp_parser (>= 1.7)
32
+ regexp_parser (>= 1.8, < 3.0)
33
33
  rexml
34
- rubocop-ast (>= 0.0.3, < 1.0)
34
+ rubocop-ast (>= 1.2.0, < 2.0)
35
35
  ruby-progressbar (~> 1.7)
36
- unicode-display_width (>= 1.4.0, < 2.0)
37
- rubocop-ast (0.0.3)
38
- parser (>= 2.7.0.1)
39
- ruby-progressbar (1.10.1)
40
- simplecov (0.18.5)
36
+ unicode-display_width (>= 1.4.0, < 3.0)
37
+ rubocop-ast (1.4.1)
38
+ parser (>= 2.7.1.5)
39
+ ruby-progressbar (1.11.0)
40
+ simplecov (0.21.2)
41
41
  docile (~> 1.1)
42
42
  simplecov-html (~> 0.11)
43
- simplecov-html (0.12.2)
44
- unicode-display_width (1.7.0)
43
+ simplecov_json_formatter (~> 0.1)
44
+ simplecov-html (0.12.3)
45
+ simplecov_json_formatter (0.1.2)
46
+ unicode-display_width (2.0.0)
45
47
 
46
48
  PLATFORMS
47
- ruby
49
+ x86_64-darwin-20
48
50
 
49
51
  DEPENDENCIES
50
52
  inheritance-helper
@@ -54,4 +56,4 @@ DEPENDENCIES
54
56
  simplecov
55
57
 
56
58
  BUNDLED WITH
57
- 1.17.3
59
+ 2.2.3
data/bin/csv-validator CHANGED
@@ -35,20 +35,24 @@ id_column_name = ARGV[1]
35
35
  headers = csv.shift
36
36
  strip_bom!(headers[0])
37
37
 
38
- id_column_num = nil
39
- if id_column_name
40
- unless headers.include?(id_column_name)
41
- $stderr.puts("header #{id_column_name} not found in current set of headers")
42
- exit 1
43
- end
44
-
45
- id_column_num = headers.index(id_column_name)
38
+ id_column_name ||= headers[0]
39
+ unless headers.include?(id_column_name)
40
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
41
+ exit 1
46
42
  end
47
43
 
44
+ id_column_num = headers.index(id_column_name)
45
+
48
46
  out = nil
49
- if id_column_num
50
- out = CSV.open('utf8-correctsion.csv', 'wb')
51
- out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
47
+ out_proc = Proc.new do |row|
48
+ out ||=
49
+ begin
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ out
53
+ end
54
+
55
+ out << row
52
56
  end
53
57
 
54
58
  csv_lineno = 1
@@ -66,7 +70,7 @@ while (row = csv.shift)
66
70
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
67
71
  if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
68
72
  puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
69
- out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
73
+ out_proc.call [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
70
74
  else
71
75
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
72
76
  end
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.8'
5
+ s.version = '0.3.9'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
data/lib/csv-utils.rb CHANGED
@@ -2,6 +2,7 @@ require 'csv'
2
2
 
3
3
  # Collection of tools for working with CSV files.
4
4
  module CSVUtils
5
+ autoload :CSVCompare, 'csv_utils/csv_compare'
5
6
  autoload :CSVExtender, 'csv_utils/csv_extender'
6
7
  autoload :CSVIterator, 'csv_utils/csv_iterator'
7
8
  autoload :CSVOptions, 'csv_utils/csv_options'
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSVUtils::CSVCompare purpose is to determine which rows in the secondary_data_file need to be created, deleted or updated
4
+ # **requires both CSV files to be sorted on the same columns, CSVUtils::CSVSort can accomplish this
5
+ # In order to receive updates, update_comparison_columns must configured or use inheritance and change the update_row? method
6
+ class CSVUtils::CSVCompare
7
+ # primary_data_file is the source of truth
8
+ # compare_proc used to compare the id column(s)
9
+ # update_comparison_columns column(s) to compare for equality, ex: updated_at, timestamp, hash
10
+ # caveat: update_comparison_columns need to be in both csv files
11
+ attr_reader :primary_data_file,
12
+ :update_comparison_columns,
13
+ :compare_proc
14
+
15
+ def initialize(primary_data_file, update_comparison_columns=nil, &block)
16
+ @primary_data_file = primary_data_file
17
+ @update_comparison_columns = update_comparison_columns
18
+ @compare_proc = block
19
+ end
20
+
21
+ def compare(secondary_data_file)
22
+ src = CSV.open(primary_data_file)
23
+ src_headers = src.shift
24
+ dest = CSV.open(secondary_data_file)
25
+ dest_headers = dest.shift
26
+
27
+ read_next_src = true
28
+ read_next_dest = true
29
+
30
+ while(!src.eof? || !dest.eof?)
31
+ src_record = next_record_from_file(src_headers, src) if read_next_src
32
+ dest_record = next_record_from_file(dest_headers, dest) if read_next_dest
33
+
34
+ if ! src_record
35
+ read_next_src = false
36
+ read_next_dest = true
37
+
38
+ yield :delete, dest_record
39
+ elsif ! dest_record
40
+ read_next_src = true
41
+ read_next_dest = false
42
+
43
+ yield :create, src_record
44
+ elsif compare_proc.call(src_record, dest_record) == 0
45
+ read_next_src = true
46
+ read_next_dest = true
47
+
48
+ yield(:update, src_record) if update_row?(src_record, dest_record)
49
+ elsif compare_proc.call(src_record, dest_record) > 0
50
+ read_next_src = false
51
+ read_next_dest = true
52
+
53
+ yield :delete, dest_record
54
+ else
55
+ read_next_src = true
56
+ read_next_dest = false
57
+
58
+ yield :create, src_record
59
+ end
60
+ end
61
+
62
+ src.close
63
+ dest.close
64
+ end
65
+
66
+ private
67
+
68
+ def next_record_from_file(headers, file)
69
+ return nil if file.eof?
70
+
71
+ Hash[headers.zip(file.shift)]
72
+ end
73
+
74
+ def update_row?(src_record, dest_record)
75
+ return false unless update_comparison_columns
76
+
77
+ update_comparison_columns.each do |column_name|
78
+ return true unless src_record[column_name] == dest_record[column_name]
79
+ end
80
+
81
+ false
82
+ end
83
+ end
@@ -9,7 +9,7 @@ module CSVUtils
9
9
  if csv.is_a?(String)
10
10
  @must_close = true
11
11
  mode = csv_options.delete(:mode) || 'wb'
12
- CSV.open(csv, mode, csv_options)
12
+ CSV.open(csv, mode, **csv_options)
13
13
  else
14
14
  @must_close = false
15
15
  csv
@@ -25,9 +25,9 @@ class CSVUtils::CSVSort
25
25
  private
26
26
 
27
27
  def merge_sort_csv_files(src_csv_file1, src_csv_file2, dest_csv_file)
28
- src1 = CSV.open(src_csv_file1, 'rb', csv_options)
29
- src2 = CSV.open(src_csv_file2, 'rb', csv_options)
30
- dest = CSV.open(dest_csv_file, 'wb', csv_options)
28
+ src1 = CSV.open(src_csv_file1, 'rb', **csv_options)
29
+ src2 = CSV.open(src_csv_file2, 'rb', **csv_options)
30
+ dest = CSV.open(dest_csv_file, 'wb', **csv_options)
31
31
 
32
32
  if @headers
33
33
  dest << @headers
@@ -66,7 +66,7 @@ class CSVUtils::CSVSort
66
66
  end
67
67
 
68
68
  def create_sorted_csv_part_files(batch_size, &block)
69
- src = CSV.open(csv_file, 'rb', csv_options)
69
+ src = CSV.open(csv_file, 'rb', **csv_options)
70
70
 
71
71
  @headers = src.shift if has_headers
72
72
 
@@ -74,7 +74,7 @@ class CSVUtils::CSVSort
74
74
  create_batch_part_proc = Proc.new do
75
75
  batch.sort!(&block)
76
76
  @csv_part_files << "#{new_csv_file}.part.#{@csv_part_files.size}"
77
- CSV.open(@csv_part_files.last, 'wb', csv_options) do |csv|
77
+ CSV.open(@csv_part_files.last, 'wb', **csv_options) do |csv|
78
78
  csv << @headers if @headers
79
79
  batch.each { |row| csv << row }
80
80
  end
@@ -107,6 +107,6 @@ class CSVUtils::CSVSort
107
107
  File.unlink(csv_part_file2)
108
108
  end
109
109
 
110
- FileUtils.mv(@csv_part_files.last, new_csv_file)
110
+ FileUtils.mv(@csv_part_files.last || @csv_file, new_csv_file)
111
111
  end
112
112
  end
@@ -20,7 +20,7 @@ class CSVUtils::CSVWrapper
20
20
  def open(csv, mode, csv_options)
21
21
  if csv.is_a?(String)
22
22
  @close_when_done = true
23
- @csv = CSV.open(csv, mode, csv_options)
23
+ @csv = CSV.open(csv, mode, **csv_options)
24
24
  else
25
25
  @close_when_done = false
26
26
  @csv = csv
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.3.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-05 00:00:00.000000000 Z
11
+ date: 2021-03-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -49,6 +49,7 @@ files:
49
49
  - bin/csv-validator
50
50
  - csv-utils.gemspec
51
51
  - lib/csv-utils.rb
52
+ - lib/csv_utils/csv_compare.rb
52
53
  - lib/csv_utils/csv_extender.rb
53
54
  - lib/csv_utils/csv_iterator.rb
54
55
  - lib/csv_utils/csv_options.rb
@@ -77,7 +78,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
78
  - !ruby/object:Gem::Version
78
79
  version: '0'
79
80
  requirements: []
80
- rubygems_version: 3.0.8
81
+ rubygems_version: 3.2.3
81
82
  signing_key:
82
83
  specification_version: 4
83
84
  summary: CSV Utils