csv-utils 0.3.7 → 0.3.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0248ce0ef1db03a751c864fdf52194b25a21f21ef90cbeaf8d0ae07075cbaf51'
4
- data.tar.gz: c48e4f907963b1ace89fecb095f2a7ee17ec1670db2371dd4e015005800b757b
3
+ metadata.gz: d7b43801f0f7e62d34d8a77508964f0b6ea054a3ac2c8d17bf42d22aa0b245bd
4
+ data.tar.gz: ccd3282e4c7b1e1985439e24a159fe09c570b18446adee2b8db4776bc6024852
5
5
  SHA512:
6
- metadata.gz: 848fa57de005d00bdf24cd07b533680cd9307c07a19b2fb955391737e36407dfc6be22edef55742fb0c90e8ed5273d4c584840e7d7a657404c1242947a88692b
7
- data.tar.gz: 71209d88db0d67ed606e4db56e3cf667081074523373951fea44698537af0661cf63c79eb9b971419bb13bfc2a8b1b9c10c2f89e911b913b4f6c4bc8d00ee061
6
+ metadata.gz: 1684319de3c4f728f55e1c201790dd5cc3aa110e09d14a4043fcdf3b70c02568fe898c4f2d7b839e633f461294f9870fae0a8cf2d3d785d173aa66f9c8ba2ab1
7
+ data.tar.gz: 6e7689f6f544e5280dbf144dc5a23a4b088aeff6c06931c0a856f6cd16353048e4ed343da0406550d2e7f48abd8af6e1ef31f6114ec9f3435aa56815c5fea24b
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.6.3
1
+ 3.0.0
data/Gemfile.lock CHANGED
@@ -1,50 +1,52 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- ast (2.4.1)
5
- diff-lcs (1.3)
6
- docile (1.3.2)
4
+ ast (2.4.2)
5
+ diff-lcs (1.4.4)
6
+ docile (1.3.5)
7
7
  inheritance-helper (0.1.5)
8
- parallel (1.19.2)
9
- parser (2.7.1.4)
8
+ parallel (1.20.1)
9
+ parser (3.0.0.0)
10
10
  ast (~> 2.4.1)
11
11
  rainbow (3.0.0)
12
- rake (13.0.1)
13
- regexp_parser (1.7.1)
12
+ rake (13.0.3)
13
+ regexp_parser (2.1.1)
14
14
  rexml (3.2.4)
15
- rspec (3.9.0)
16
- rspec-core (~> 3.9.0)
17
- rspec-expectations (~> 3.9.0)
18
- rspec-mocks (~> 3.9.0)
19
- rspec-core (3.9.2)
20
- rspec-support (~> 3.9.3)
21
- rspec-expectations (3.9.2)
15
+ rspec (3.10.0)
16
+ rspec-core (~> 3.10.0)
17
+ rspec-expectations (~> 3.10.0)
18
+ rspec-mocks (~> 3.10.0)
19
+ rspec-core (3.10.1)
20
+ rspec-support (~> 3.10.0)
21
+ rspec-expectations (3.10.1)
22
22
  diff-lcs (>= 1.2.0, < 2.0)
23
- rspec-support (~> 3.9.0)
24
- rspec-mocks (3.9.1)
23
+ rspec-support (~> 3.10.0)
24
+ rspec-mocks (3.10.2)
25
25
  diff-lcs (>= 1.2.0, < 2.0)
26
- rspec-support (~> 3.9.0)
27
- rspec-support (3.9.3)
28
- rubocop (0.86.0)
26
+ rspec-support (~> 3.10.0)
27
+ rspec-support (3.10.2)
28
+ rubocop (1.11.0)
29
29
  parallel (~> 1.10)
30
- parser (>= 2.7.0.1)
30
+ parser (>= 3.0.0.0)
31
31
  rainbow (>= 2.2.2, < 4.0)
32
- regexp_parser (>= 1.7)
32
+ regexp_parser (>= 1.8, < 3.0)
33
33
  rexml
34
- rubocop-ast (>= 0.0.3, < 1.0)
34
+ rubocop-ast (>= 1.2.0, < 2.0)
35
35
  ruby-progressbar (~> 1.7)
36
- unicode-display_width (>= 1.4.0, < 2.0)
37
- rubocop-ast (0.0.3)
38
- parser (>= 2.7.0.1)
39
- ruby-progressbar (1.10.1)
40
- simplecov (0.18.5)
36
+ unicode-display_width (>= 1.4.0, < 3.0)
37
+ rubocop-ast (1.4.1)
38
+ parser (>= 2.7.1.5)
39
+ ruby-progressbar (1.11.0)
40
+ simplecov (0.21.2)
41
41
  docile (~> 1.1)
42
42
  simplecov-html (~> 0.11)
43
- simplecov-html (0.12.2)
44
- unicode-display_width (1.7.0)
43
+ simplecov_json_formatter (~> 0.1)
44
+ simplecov-html (0.12.3)
45
+ simplecov_json_formatter (0.1.2)
46
+ unicode-display_width (2.0.0)
45
47
 
46
48
  PLATFORMS
47
- ruby
49
+ x86_64-darwin-20
48
50
 
49
51
  DEPENDENCIES
50
52
  inheritance-helper
@@ -54,4 +56,4 @@ DEPENDENCIES
54
56
  simplecov
55
57
 
56
58
  BUNDLED WITH
57
- 1.17.3
59
+ 2.2.3
data/bin/csv-validator CHANGED
@@ -35,20 +35,24 @@ id_column_name = ARGV[1]
35
35
  headers = csv.shift
36
36
  strip_bom!(headers[0])
37
37
 
38
- id_column_num = nil
39
- if id_column_name
40
- unless headers.include?(id_column_name)
41
- $stderr.puts("header #{id_column_name} not found in current set of headers")
42
- exit 1
43
- end
44
-
45
- id_column_num = headers.index(id_column_name)
38
+ id_column_name ||= headers[0]
39
+ unless headers.include?(id_column_name)
40
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
41
+ exit 1
46
42
  end
47
43
 
44
+ id_column_num = headers.index(id_column_name)
45
+
48
46
  out = nil
49
- if id_column_num
50
- out = CSV.open('utf8-correctsion.csv', 'wb')
51
- out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
47
+ out_proc = Proc.new do |row|
48
+ out ||=
49
+ begin
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ out
53
+ end
54
+
55
+ out << row
52
56
  end
53
57
 
54
58
  csv_lineno = 1
@@ -66,7 +70,7 @@ while (row = csv.shift)
66
70
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
67
71
  if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
68
72
  puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
69
- out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
73
+ out_proc.call [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
70
74
  else
71
75
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
72
76
  end
data/csv-utils.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.7'
5
+ s.version = '0.3.12'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
data/lib/csv-utils.rb CHANGED
@@ -2,6 +2,7 @@ require 'csv'
2
2
 
3
3
  # Collection of tools for working with CSV files.
4
4
  module CSVUtils
5
+ autoload :CSVCompare, 'csv_utils/csv_compare'
5
6
  autoload :CSVExtender, 'csv_utils/csv_extender'
6
7
  autoload :CSVIterator, 'csv_utils/csv_iterator'
7
8
  autoload :CSVOptions, 'csv_utils/csv_options'
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ # CSVUtils::CSVCompare purpose is to determine which rows in the secondary_data_file need to be created, deleted or updated
4
+ # **requires both CSV files to be sorted on the same columns, CSVUtils::CSVSort can accomplish this
5
+ # In order to receive updates, update_comparison_columns must configured or use inheritance and change the update_row? method
6
+ class CSVUtils::CSVCompare
7
+ # primary_data_file is the source of truth
8
+ # compare_proc used to compare the id column(s)
9
+ # update_comparison_columns column(s) to compare for equality, ex: updated_at, timestamp, hash
10
+ # caveat: update_comparison_columns need to be in both csv files
11
+ attr_reader :primary_data_file,
12
+ :update_comparison_columns,
13
+ :compare_proc
14
+
15
+ def initialize(primary_data_file, update_comparison_columns=nil, &block)
16
+ @primary_data_file = primary_data_file
17
+ @update_comparison_columns = update_comparison_columns
18
+ @compare_proc = block
19
+ end
20
+
21
+ def compare(secondary_data_file)
22
+ src = CSV.open(primary_data_file)
23
+ src_headers = src.shift
24
+ dest = CSV.open(secondary_data_file)
25
+ dest_headers = dest.shift
26
+
27
+ read_next_src = true
28
+ read_next_dest = true
29
+
30
+ while(!src.eof? || !dest.eof?)
31
+ src_record = next_record_from_file(src_headers, src) if read_next_src
32
+ dest_record = next_record_from_file(dest_headers, dest) if read_next_dest
33
+
34
+ if ! src_record
35
+ read_next_src = false
36
+ read_next_dest = true
37
+
38
+ yield :delete, dest_record
39
+ elsif ! dest_record
40
+ read_next_src = true
41
+ read_next_dest = false
42
+
43
+ yield :create, src_record
44
+ elsif compare_proc.call(src_record, dest_record) == 0
45
+ read_next_src = true
46
+ read_next_dest = true
47
+
48
+ yield(:update, src_record) if update_row?(src_record, dest_record)
49
+ elsif compare_proc.call(src_record, dest_record) > 0
50
+ read_next_src = false
51
+ read_next_dest = true
52
+
53
+ yield :delete, dest_record
54
+ else
55
+ read_next_src = true
56
+ read_next_dest = false
57
+
58
+ yield :create, src_record
59
+ end
60
+ end
61
+
62
+ src.close
63
+ dest.close
64
+ end
65
+
66
+ private
67
+
68
+ def next_record_from_file(headers, file)
69
+ return nil if file.eof?
70
+
71
+ Hash[headers.zip(file.shift)]
72
+ end
73
+
74
+ def update_row?(src_record, dest_record)
75
+ return false unless update_comparison_columns
76
+
77
+ update_comparison_columns.each do |column_name|
78
+ return true unless src_record[column_name] == dest_record[column_name]
79
+ end
80
+
81
+ false
82
+ end
83
+ end
@@ -14,7 +14,7 @@ class CSVUtils::CSVIterator
14
14
  end
15
15
 
16
16
  def to_pretty_s
17
- reject { |_, v| v.strip.empty? }
17
+ reject { |_, v| v.nil? || v.strip.empty? }
18
18
  .each_with_index
19
19
  .map { |(k, v), idx| sprintf(' %-3d %s: %s', idx+1, k, v) }
20
20
  .join("\n") + "\n"
@@ -43,6 +43,26 @@ class CSVUtils::CSVIterator
43
43
  end
44
44
  end
45
45
 
46
+ def headers
47
+ first.keys
48
+ end
49
+
50
+ def to_hash(key, value = nil)
51
+ raise("header #{key} not found in #{headers}") unless headers.include?(key)
52
+ raise("headers #{value} not found in #{headers}") if value && !headers.include?(value)
53
+
54
+ value_proc =
55
+ if value
56
+ proc { |row| row[value] }
57
+ else
58
+ proc { |row| yield(row) }
59
+ end
60
+
61
+ each_with_object({}) do |row, hsh|
62
+ hsh[row[key]] = value_proc.call(row)
63
+ end
64
+ end
65
+
46
66
  private
47
67
 
48
68
  def strip_bom!(col)
@@ -9,17 +9,18 @@ module CSVUtils
9
9
  if csv.is_a?(String)
10
10
  @must_close = true
11
11
  mode = csv_options.delete(:mode) || 'wb'
12
- CSV.open(csv, mode, csv_options)
12
+ CSV.open(csv, mode, **csv_options)
13
13
  else
14
14
  @must_close = false
15
15
  csv
16
16
  end
17
17
 
18
- generate(headers, &block) if block
18
+ add_headers(headers) if headers
19
+
20
+ generate(&block) if block
19
21
  end
20
22
 
21
- def generate(headers = nil)
22
- add_headers(headers) if headers
23
+ def generate
23
24
  yield self
24
25
  close if @must_close
25
26
  end
@@ -25,9 +25,9 @@ class CSVUtils::CSVSort
25
25
  private
26
26
 
27
27
  def merge_sort_csv_files(src_csv_file1, src_csv_file2, dest_csv_file)
28
- src1 = CSV.open(src_csv_file1, 'rb', csv_options)
29
- src2 = CSV.open(src_csv_file2, 'rb', csv_options)
30
- dest = CSV.open(dest_csv_file, 'wb', csv_options)
28
+ src1 = CSV.open(src_csv_file1, 'rb', **csv_options)
29
+ src2 = CSV.open(src_csv_file2, 'rb', **csv_options)
30
+ dest = CSV.open(dest_csv_file, 'wb', **csv_options)
31
31
 
32
32
  if @headers
33
33
  dest << @headers
@@ -66,7 +66,7 @@ class CSVUtils::CSVSort
66
66
  end
67
67
 
68
68
  def create_sorted_csv_part_files(batch_size, &block)
69
- src = CSV.open(csv_file, 'rb', csv_options)
69
+ src = CSV.open(csv_file, 'rb', **csv_options)
70
70
 
71
71
  @headers = src.shift if has_headers
72
72
 
@@ -74,7 +74,7 @@ class CSVUtils::CSVSort
74
74
  create_batch_part_proc = Proc.new do
75
75
  batch.sort!(&block)
76
76
  @csv_part_files << "#{new_csv_file}.part.#{@csv_part_files.size}"
77
- CSV.open(@csv_part_files.last, 'wb', csv_options) do |csv|
77
+ CSV.open(@csv_part_files.last, 'wb', **csv_options) do |csv|
78
78
  csv << @headers if @headers
79
79
  batch.each { |row| csv << row }
80
80
  end
@@ -107,6 +107,10 @@ class CSVUtils::CSVSort
107
107
  File.unlink(csv_part_file2)
108
108
  end
109
109
 
110
- FileUtils.mv(@csv_part_files.last, new_csv_file)
110
+ if @csv_part_files.size > 0
111
+ FileUtils.mv(@csv_part_files.last, new_csv_file)
112
+ else
113
+ FileUtils.cp(@csv_file, new_csv_file)
114
+ end
111
115
  end
112
116
  end
@@ -20,7 +20,7 @@ class CSVUtils::CSVWrapper
20
20
  def open(csv, mode, csv_options)
21
21
  if csv.is_a?(String)
22
22
  @close_when_done = true
23
- @csv = CSV.open(csv, mode, csv_options)
23
+ @csv = CSV.open(csv, mode, **csv_options)
24
24
  else
25
25
  @close_when_done = false
26
26
  @csv = csv
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.7
4
+ version: 0.3.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-05 00:00:00.000000000 Z
11
+ date: 2021-05-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -49,6 +49,7 @@ files:
49
49
  - bin/csv-validator
50
50
  - csv-utils.gemspec
51
51
  - lib/csv-utils.rb
52
+ - lib/csv_utils/csv_compare.rb
52
53
  - lib/csv_utils/csv_extender.rb
53
54
  - lib/csv_utils/csv_iterator.rb
54
55
  - lib/csv_utils/csv_options.rb
@@ -77,7 +78,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
78
  - !ruby/object:Gem::Version
78
79
  version: '0'
79
80
  requirements: []
80
- rubygems_version: 3.0.8
81
+ rubygems_version: 3.2.3
81
82
  signing_key:
82
83
  specification_version: 4
83
84
  summary: CSV Utils