csv-utils 0.3.3 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a28b89b25aa7a6a90137b799c580aa9060d783f481000fd173281bc7e2367baf
4
- data.tar.gz: 7317ece2b8970a816b5e4b5b84623a2c966c3a2f7afa7c2dd30de831f7e3ac64
3
+ metadata.gz: c0847ad28aee7ef73e5bfa9cbd0753e42fed5097ae68bfc2083f6ed2a6d08d66
4
+ data.tar.gz: '080a53543deadaa49e06616aa9f563818d3a5bbe13b76366f1f795f075be7e41'
5
5
  SHA512:
6
- metadata.gz: e057cc9795559c630e4b939c67d00547a25e0cf83ed75b15713a6c381677a718dd2ac06f975486e5cdf45a1cbfd962caf2aa13105e57a544aa4b6fd177926445
7
- data.tar.gz: ee7552522db9fb683b8cbd73a94706a0a41e92135aed0c2ebec6c70754f1bcad210234b8e5b18da87322bbc6737b6f323c515a1920f09fda3230a5cdef2393d1
6
+ metadata.gz: 8504ec3c569e92e3c7adc141af4077e04085b8befdad8a15c76f05405045dad52821aff1fc26d792eba4d533ea588794d9ab88f96f5c37e2cf7146624090eded
7
+ data.tar.gz: f4dfa17b0ec81d5923e3ff32978f584a64d567aa8c770dd7b252770f1535ae4eba578dcec105f201ae006929114c90e1615196c6d28ac6fd36bd9a89fb978617
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv-utils'
4
+ require 'irb'
5
+
6
+ CSV_FILE = ARGV.shift
7
+
8
+ def csv
9
+ @csv ||= CSVUtils::CSVIterator.new(CSV_FILE)
10
+ end
11
+
12
+ IRB.start
@@ -3,10 +3,12 @@
3
3
  require 'csv'
4
4
  require 'shellwords'
5
5
 
6
+ prev_row = nil
6
7
  begin
7
- CSV.open(ARGV[0], 'rb').each { }
8
+ CSV.open(ARGV[0], 'rb').each { |row| prev_row = row }
8
9
  rescue CSV::MalformedCSVError => e
9
10
  puts e.class.to_s + ': ' + e.message
11
+ puts "previous row was #{prev_row}"
10
12
  if e.message =~ /line (\d+)/
11
13
  lineno = $1.to_i
12
14
  cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
@@ -25,11 +25,32 @@ def detect_encoding(col)
25
25
  CharDet.detect(col)['encoding']
26
26
  end
27
27
 
28
+ def strip_bom!(col)
29
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
30
+ end
31
+
28
32
  csv = CSV.open(ARGV[0], 'rb')
29
- out = CSV.open(ARGV[1], 'wb') if ARGV[1]
33
+ id_column_name = ARGV[1]
30
34
 
31
35
  headers = csv.shift
32
- out << headers if out
36
+ strip_bom!(headers[0])
37
+
38
+ id_column_num = nil
39
+ if id_column_name
40
+ unless headers.include?(id_column_name)
41
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
42
+ exit 1
43
+ end
44
+
45
+ id_column_num = headers.index(id_column_name)
46
+ end
47
+
48
+ out = nil
49
+ if id_column_num
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ end
53
+
33
54
  csv_lineno = 1
34
55
 
35
56
  while (row = csv.shift)
@@ -39,21 +60,17 @@ while (row = csv.shift)
39
60
  $stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}"
40
61
  end
41
62
 
42
- converted = false
43
63
  row.each_with_index do |col, idx|
44
- next if utf8?(col)
64
+ next if col.nil? || utf8?(col)
45
65
 
46
66
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
47
67
  if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
48
- converted = true
49
68
  puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
50
- row[idx] = col_utf8_encoded
69
+ out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
51
70
  else
52
71
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
53
72
  end
54
73
  end
55
-
56
- out << row if out && converted
57
74
  end
58
75
 
59
76
  csv.close
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.3'
5
+ s.version = '0.3.8'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
@@ -3,6 +3,7 @@ require 'csv'
3
3
  # Collection of tools for working with CSV files.
4
4
  module CSVUtils
5
5
  autoload :CSVExtender, 'csv_utils/csv_extender'
6
+ autoload :CSVIterator, 'csv_utils/csv_iterator'
6
7
  autoload :CSVOptions, 'csv_utils/csv_options'
7
8
  autoload :CSVReport, 'csv_utils/csv_report'
8
9
  autoload :CSVRow, 'csv_utils/csv_row'
@@ -0,0 +1,51 @@
1
+ # Search a CSV given a series of steps
2
+ class CSVUtils::CSVIterator
3
+ include Enumerable
4
+
5
+ attr_reader :prev_row
6
+
7
+ class RowWrapper < Hash
8
+ attr_accessor :lineno
9
+
10
+ def self.create(headers, row, lineno)
11
+ row_wrapper = RowWrapper[headers.zip(row)]
12
+ row_wrapper.lineno = lineno
13
+ row_wrapper
14
+ end
15
+
16
+ def to_pretty_s
17
+ reject { |_, v| v.strip.empty? }
18
+ .each_with_index
19
+ .map { |(k, v), idx| sprintf(' %-3d %s: %s', idx+1, k, v) }
20
+ .join("\n") + "\n"
21
+ end
22
+ end
23
+
24
+ def initialize(src_csv, csv_options = {})
25
+ @src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
26
+ end
27
+
28
+ def each(headers = nil)
29
+ @src_csv.rewind
30
+
31
+ lineno = 0
32
+ unless headers
33
+ headers = @src_csv.shift
34
+ strip_bom!(headers[0])
35
+ lineno += 1
36
+ end
37
+
38
+ @prev_row = nil
39
+ while (row = @src_csv.shift)
40
+ lineno += 1
41
+ yield RowWrapper.create(headers, row, lineno)
42
+ @prev_row = row
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def strip_bom!(col)
49
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
50
+ end
51
+ end
@@ -15,13 +15,14 @@ module CSVUtils
15
15
  csv
16
16
  end
17
17
 
18
- generate(headers, &block) if block
18
+ add_headers(headers) if headers
19
+
20
+ generate(&block) if block
19
21
  end
20
22
 
21
- def generate(headers = nil)
22
- add_headers(headers) if headers
23
+ def generate
23
24
  yield self
24
- @csv.close if @must_close
25
+ close if @must_close
25
26
  end
26
27
 
27
28
  def append(csv_row)
@@ -37,5 +38,9 @@ module CSVUtils
37
38
  def add_headers(csv_row)
38
39
  append(csv_row.is_a?(Array) ? csv_row : csv_row.csv_headers)
39
40
  end
41
+
42
+ def close
43
+ @csv.close
44
+ end
40
45
  end
41
46
  end
@@ -35,6 +35,10 @@ class CSVUtils::CSVWrapper
35
35
  csv.shift
36
36
  end
37
37
 
38
+ def rewind
39
+ csv.rewind
40
+ end
41
+
38
42
  def close
39
43
  csv.close if close_when_done?
40
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-20 00:00:00.000000000 Z
11
+ date: 2020-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -28,6 +28,7 @@ description: Tools for debugging malformed CSV files
28
28
  email: dougyouch@gmail.com
29
29
  executables:
30
30
  - csv-change-eol
31
+ - csv-explorer
31
32
  - csv-find-error
32
33
  - csv-readline
33
34
  - csv-validator
@@ -42,12 +43,14 @@ files:
42
43
  - LICENSE
43
44
  - README.md
44
45
  - bin/csv-change-eol
46
+ - bin/csv-explorer
45
47
  - bin/csv-find-error
46
48
  - bin/csv-readline
47
49
  - bin/csv-validator
48
50
  - csv-utils.gemspec
49
51
  - lib/csv-utils.rb
50
52
  - lib/csv_utils/csv_extender.rb
53
+ - lib/csv_utils/csv_iterator.rb
51
54
  - lib/csv_utils/csv_options.rb
52
55
  - lib/csv_utils/csv_report.rb
53
56
  - lib/csv_utils/csv_row.rb