csv-utils 0.3.8 → 0.3.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0847ad28aee7ef73e5bfa9cbd0753e42fed5097ae68bfc2083f6ed2a6d08d66
4
- data.tar.gz: '080a53543deadaa49e06616aa9f563818d3a5bbe13b76366f1f795f075be7e41'
3
+ metadata.gz: b5b6f30da64b899586ef7d801904b8da9b01b6053f9d975f0702edcc5a7b65e0
4
+ data.tar.gz: e7e5006c6b63a9b8472e4658c748071619d09adf0ca73a84109b43b8862f1bcd
5
5
  SHA512:
6
- metadata.gz: 8504ec3c569e92e3c7adc141af4077e04085b8befdad8a15c76f05405045dad52821aff1fc26d792eba4d533ea588794d9ab88f96f5c37e2cf7146624090eded
7
- data.tar.gz: f4dfa17b0ec81d5923e3ff32978f584a64d567aa8c770dd7b252770f1535ae4eba578dcec105f201ae006929114c90e1615196c6d28ac6fd36bd9a89fb978617
6
+ metadata.gz: 7ded9a4318f44f77b6c2ccf72a844a7cec734745052c62be5e19c2dbadc8e989b3c89528058fabfd63cbcb59a1575293b9cc87a343bb4d68dbad3051ba04616d
7
+ data.tar.gz: dd8f0d7cd953eee05489423d0ef55e0b486500d2499d442d7d523636ced12858a2e9cec7ad60ce9d0126516121674d3c1e2a879ea89bb3a93cf7100709204b9d
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.6.3
1
+ 3.0.0
data/Gemfile.lock CHANGED
@@ -1,50 +1,52 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- ast (2.4.1)
5
- diff-lcs (1.3)
6
- docile (1.3.2)
4
+ ast (2.4.2)
5
+ diff-lcs (1.4.4)
6
+ docile (1.3.5)
7
7
  inheritance-helper (0.1.5)
8
- parallel (1.19.2)
9
- parser (2.7.1.4)
8
+ parallel (1.20.1)
9
+ parser (3.0.0.0)
10
10
  ast (~> 2.4.1)
11
11
  rainbow (3.0.0)
12
- rake (13.0.1)
13
- regexp_parser (1.7.1)
12
+ rake (13.0.3)
13
+ regexp_parser (2.1.1)
14
14
  rexml (3.2.4)
15
- rspec (3.9.0)
16
- rspec-core (~> 3.9.0)
17
- rspec-expectations (~> 3.9.0)
18
- rspec-mocks (~> 3.9.0)
19
- rspec-core (3.9.2)
20
- rspec-support (~> 3.9.3)
21
- rspec-expectations (3.9.2)
15
+ rspec (3.10.0)
16
+ rspec-core (~> 3.10.0)
17
+ rspec-expectations (~> 3.10.0)
18
+ rspec-mocks (~> 3.10.0)
19
+ rspec-core (3.10.1)
20
+ rspec-support (~> 3.10.0)
21
+ rspec-expectations (3.10.1)
22
22
  diff-lcs (>= 1.2.0, < 2.0)
23
- rspec-support (~> 3.9.0)
24
- rspec-mocks (3.9.1)
23
+ rspec-support (~> 3.10.0)
24
+ rspec-mocks (3.10.2)
25
25
  diff-lcs (>= 1.2.0, < 2.0)
26
- rspec-support (~> 3.9.0)
27
- rspec-support (3.9.3)
28
- rubocop (0.86.0)
26
+ rspec-support (~> 3.10.0)
27
+ rspec-support (3.10.2)
28
+ rubocop (1.11.0)
29
29
  parallel (~> 1.10)
30
- parser (>= 2.7.0.1)
30
+ parser (>= 3.0.0.0)
31
31
  rainbow (>= 2.2.2, < 4.0)
32
- regexp_parser (>= 1.7)
32
+ regexp_parser (>= 1.8, < 3.0)
33
33
  rexml
34
- rubocop-ast (>= 0.0.3, < 1.0)
34
+ rubocop-ast (>= 1.2.0, < 2.0)
35
35
  ruby-progressbar (~> 1.7)
36
- unicode-display_width (>= 1.4.0, < 2.0)
37
- rubocop-ast (0.0.3)
38
- parser (>= 2.7.0.1)
39
- ruby-progressbar (1.10.1)
40
- simplecov (0.18.5)
36
+ unicode-display_width (>= 1.4.0, < 3.0)
37
+ rubocop-ast (1.4.1)
38
+ parser (>= 2.7.1.5)
39
+ ruby-progressbar (1.11.0)
40
+ simplecov (0.21.2)
41
41
  docile (~> 1.1)
42
42
  simplecov-html (~> 0.11)
43
- simplecov-html (0.12.2)
44
- unicode-display_width (1.7.0)
43
+ simplecov_json_formatter (~> 0.1)
44
+ simplecov-html (0.12.3)
45
+ simplecov_json_formatter (0.1.2)
46
+ unicode-display_width (2.0.0)
45
47
 
46
48
  PLATFORMS
47
- ruby
49
+ x86_64-darwin-20
48
50
 
49
51
  DEPENDENCIES
50
52
  inheritance-helper
@@ -54,4 +56,4 @@ DEPENDENCIES
54
56
  simplecov
55
57
 
56
58
  BUNDLED WITH
57
- 1.17.3
59
+ 2.2.3
data/bin/csv-validator CHANGED
@@ -35,20 +35,24 @@ id_column_name = ARGV[1]
35
35
  headers = csv.shift
36
36
  strip_bom!(headers[0])
37
37
 
38
- id_column_num = nil
39
- if id_column_name
40
- unless headers.include?(id_column_name)
41
- $stderr.puts("header #{id_column_name} not found in current set of headers")
42
- exit 1
43
- end
44
-
45
- id_column_num = headers.index(id_column_name)
38
+ id_column_name ||= headers[0]
39
+ unless headers.include?(id_column_name)
40
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
41
+ exit 1
46
42
  end
47
43
 
44
+ id_column_num = headers.index(id_column_name)
45
+
48
46
  out = nil
49
- if id_column_num
50
- out = CSV.open('utf8-correctsion.csv', 'wb')
51
- out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
47
+ out_proc = Proc.new do |row|
48
+ out ||=
49
+ begin
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ out
53
+ end
54
+
55
+ out << row
52
56
  end
53
57
 
54
58
  csv_lineno = 1
@@ -66,7 +70,7 @@ while (row = csv.shift)
66
70
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
67
71
  if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
68
72
  puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
69
- out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
73
+ out_proc.call [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
70
74
  else
71
75
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
72
76
  end
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.8'
5
+ s.version = '0.3.9'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
data/lib/csv-utils.rb CHANGED
@@ -2,6 +2,7 @@ require 'csv'
2
2
 
3
3
  # Collection of tools for working with CSV files.
4
4
  module CSVUtils
5
+ autoload :CSVCompare, 'csv_utils/csv_compare'
5
6
  autoload :CSVExtender, 'csv_utils/csv_extender'
6
7
  autoload :CSVIterator, 'csv_utils/csv_iterator'
7
8
  autoload :CSVOptions, 'csv_utils/csv_options'
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSVUtils::CSVCompare purpose is to determine which rows in the secondary_data_file need to be created, deleted or updated
4
+ # **requires both CSV files to be sorted on the same columns, CSVUtils::CSVSort can accomplish this
5
+ # In order to receive updates, update_comparison_columns must configured or use inheritance and change the update_row? method
6
+ class CSVUtils::CSVCompare
7
+ # primary_data_file is the source of truth
8
+ # compare_proc used to compare the id column(s)
9
+ # update_comparison_columns column(s) to compare for equality, ex: updated_at, timestamp, hash
10
+ # caveat: update_comparison_columns need to be in both csv files
11
+ attr_reader :primary_data_file,
12
+ :update_comparison_columns,
13
+ :compare_proc
14
+
15
+ def initialize(primary_data_file, update_comparison_columns=nil, &block)
16
+ @primary_data_file = primary_data_file
17
+ @update_comparison_columns = update_comparison_columns
18
+ @compare_proc = block
19
+ end
20
+
21
+ def compare(secondary_data_file)
22
+ src = CSV.open(primary_data_file)
23
+ src_headers = src.shift
24
+ dest = CSV.open(secondary_data_file)
25
+ dest_headers = dest.shift
26
+
27
+ read_next_src = true
28
+ read_next_dest = true
29
+
30
+ while(!src.eof? || !dest.eof?)
31
+ src_record = next_record_from_file(src_headers, src) if read_next_src
32
+ dest_record = next_record_from_file(dest_headers, dest) if read_next_dest
33
+
34
+ if ! src_record
35
+ read_next_src = false
36
+ read_next_dest = true
37
+
38
+ yield :delete, dest_record
39
+ elsif ! dest_record
40
+ read_next_src = true
41
+ read_next_dest = false
42
+
43
+ yield :create, src_record
44
+ elsif compare_proc.call(src_record, dest_record) == 0
45
+ read_next_src = true
46
+ read_next_dest = true
47
+
48
+ yield(:update, src_record) if update_row?(src_record, dest_record)
49
+ elsif compare_proc.call(src_record, dest_record) > 0
50
+ read_next_src = false
51
+ read_next_dest = true
52
+
53
+ yield :delete, dest_record
54
+ else
55
+ read_next_src = true
56
+ read_next_dest = false
57
+
58
+ yield :create, src_record
59
+ end
60
+ end
61
+
62
+ src.close
63
+ dest.close
64
+ end
65
+
66
+ private
67
+
68
+ def next_record_from_file(headers, file)
69
+ return nil if file.eof?
70
+
71
+ Hash[headers.zip(file.shift)]
72
+ end
73
+
74
+ def update_row?(src_record, dest_record)
75
+ return false unless update_comparison_columns
76
+
77
+ update_comparison_columns.each do |column_name|
78
+ return true unless src_record[column_name] == dest_record[column_name]
79
+ end
80
+
81
+ false
82
+ end
83
+ end
@@ -9,7 +9,7 @@ module CSVUtils
9
9
  if csv.is_a?(String)
10
10
  @must_close = true
11
11
  mode = csv_options.delete(:mode) || 'wb'
12
- CSV.open(csv, mode, csv_options)
12
+ CSV.open(csv, mode, **csv_options)
13
13
  else
14
14
  @must_close = false
15
15
  csv
@@ -25,9 +25,9 @@ class CSVUtils::CSVSort
25
25
  private
26
26
 
27
27
  def merge_sort_csv_files(src_csv_file1, src_csv_file2, dest_csv_file)
28
- src1 = CSV.open(src_csv_file1, 'rb', csv_options)
29
- src2 = CSV.open(src_csv_file2, 'rb', csv_options)
30
- dest = CSV.open(dest_csv_file, 'wb', csv_options)
28
+ src1 = CSV.open(src_csv_file1, 'rb', **csv_options)
29
+ src2 = CSV.open(src_csv_file2, 'rb', **csv_options)
30
+ dest = CSV.open(dest_csv_file, 'wb', **csv_options)
31
31
 
32
32
  if @headers
33
33
  dest << @headers
@@ -66,7 +66,7 @@ class CSVUtils::CSVSort
66
66
  end
67
67
 
68
68
  def create_sorted_csv_part_files(batch_size, &block)
69
- src = CSV.open(csv_file, 'rb', csv_options)
69
+ src = CSV.open(csv_file, 'rb', **csv_options)
70
70
 
71
71
  @headers = src.shift if has_headers
72
72
 
@@ -74,7 +74,7 @@ class CSVUtils::CSVSort
74
74
  create_batch_part_proc = Proc.new do
75
75
  batch.sort!(&block)
76
76
  @csv_part_files << "#{new_csv_file}.part.#{@csv_part_files.size}"
77
- CSV.open(@csv_part_files.last, 'wb', csv_options) do |csv|
77
+ CSV.open(@csv_part_files.last, 'wb', **csv_options) do |csv|
78
78
  csv << @headers if @headers
79
79
  batch.each { |row| csv << row }
80
80
  end
@@ -107,6 +107,6 @@ class CSVUtils::CSVSort
107
107
  File.unlink(csv_part_file2)
108
108
  end
109
109
 
110
- FileUtils.mv(@csv_part_files.last, new_csv_file)
110
+ FileUtils.mv(@csv_part_files.last || @csv_file, new_csv_file)
111
111
  end
112
112
  end
@@ -20,7 +20,7 @@ class CSVUtils::CSVWrapper
20
20
  def open(csv, mode, csv_options)
21
21
  if csv.is_a?(String)
22
22
  @close_when_done = true
23
- @csv = CSV.open(csv, mode, csv_options)
23
+ @csv = CSV.open(csv, mode, **csv_options)
24
24
  else
25
25
  @close_when_done = false
26
26
  @csv = csv
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.3.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-05 00:00:00.000000000 Z
11
+ date: 2021-03-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -49,6 +49,7 @@ files:
49
49
  - bin/csv-validator
50
50
  - csv-utils.gemspec
51
51
  - lib/csv-utils.rb
52
+ - lib/csv_utils/csv_compare.rb
52
53
  - lib/csv_utils/csv_extender.rb
53
54
  - lib/csv_utils/csv_iterator.rb
54
55
  - lib/csv_utils/csv_options.rb
@@ -77,7 +78,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
78
  - !ruby/object:Gem::Version
78
79
  version: '0'
79
80
  requirements: []
80
- rubygems_version: 3.0.8
81
+ rubygems_version: 3.2.3
81
82
  signing_key:
82
83
  specification_version: 4
83
84
  summary: CSV Utils