csv-utils 0.3.3 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a28b89b25aa7a6a90137b799c580aa9060d783f481000fd173281bc7e2367baf
4
- data.tar.gz: 7317ece2b8970a816b5e4b5b84623a2c966c3a2f7afa7c2dd30de831f7e3ac64
3
+ metadata.gz: c0847ad28aee7ef73e5bfa9cbd0753e42fed5097ae68bfc2083f6ed2a6d08d66
4
+ data.tar.gz: '080a53543deadaa49e06616aa9f563818d3a5bbe13b76366f1f795f075be7e41'
5
5
  SHA512:
6
- metadata.gz: e057cc9795559c630e4b939c67d00547a25e0cf83ed75b15713a6c381677a718dd2ac06f975486e5cdf45a1cbfd962caf2aa13105e57a544aa4b6fd177926445
7
- data.tar.gz: ee7552522db9fb683b8cbd73a94706a0a41e92135aed0c2ebec6c70754f1bcad210234b8e5b18da87322bbc6737b6f323c515a1920f09fda3230a5cdef2393d1
6
+ metadata.gz: 8504ec3c569e92e3c7adc141af4077e04085b8befdad8a15c76f05405045dad52821aff1fc26d792eba4d533ea588794d9ab88f96f5c37e2cf7146624090eded
7
+ data.tar.gz: f4dfa17b0ec81d5923e3ff32978f584a64d567aa8c770dd7b252770f1535ae4eba578dcec105f201ae006929114c90e1615196c6d28ac6fd36bd9a89fb978617
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv-utils'
4
+ require 'irb'
5
+
6
+ CSV_FILE = ARGV.shift
7
+
8
+ def csv
9
+ @csv ||= CSVUtils::CSVIterator.new(CSV_FILE)
10
+ end
11
+
12
+ IRB.start
@@ -3,10 +3,12 @@
3
3
  require 'csv'
4
4
  require 'shellwords'
5
5
 
6
+ prev_row = nil
6
7
  begin
7
- CSV.open(ARGV[0], 'rb').each { }
8
+ CSV.open(ARGV[0], 'rb').each { |row| prev_row = row }
8
9
  rescue CSV::MalformedCSVError => e
9
10
  puts e.class.to_s + ': ' + e.message
11
+ puts "previous row was #{prev_row}"
10
12
  if e.message =~ /line (\d+)/
11
13
  lineno = $1.to_i
12
14
  cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
@@ -25,11 +25,32 @@ def detect_encoding(col)
25
25
  CharDet.detect(col)['encoding']
26
26
  end
27
27
 
28
+ def strip_bom!(col)
29
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
30
+ end
31
+
28
32
  csv = CSV.open(ARGV[0], 'rb')
29
- out = CSV.open(ARGV[1], 'wb') if ARGV[1]
33
+ id_column_name = ARGV[1]
30
34
 
31
35
  headers = csv.shift
32
- out << headers if out
36
+ strip_bom!(headers[0])
37
+
38
+ id_column_num = nil
39
+ if id_column_name
40
+ unless headers.include?(id_column_name)
41
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
42
+ exit 1
43
+ end
44
+
45
+ id_column_num = headers.index(id_column_name)
46
+ end
47
+
48
+ out = nil
49
+ if id_column_num
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ end
53
+
33
54
  csv_lineno = 1
34
55
 
35
56
  while (row = csv.shift)
@@ -39,21 +60,17 @@ while (row = csv.shift)
39
60
  $stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}"
40
61
  end
41
62
 
42
- converted = false
43
63
  row.each_with_index do |col, idx|
44
- next if utf8?(col)
64
+ next if col.nil? || utf8?(col)
45
65
 
46
66
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
47
67
  if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
48
- converted = true
49
68
  puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
50
- row[idx] = col_utf8_encoded
69
+ out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
51
70
  else
52
71
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
53
72
  end
54
73
  end
55
-
56
- out << row if out && converted
57
74
  end
58
75
 
59
76
  csv.close
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.3'
5
+ s.version = '0.3.8'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
@@ -3,6 +3,7 @@ require 'csv'
3
3
  # Collection of tools for working with CSV files.
4
4
  module CSVUtils
5
5
  autoload :CSVExtender, 'csv_utils/csv_extender'
6
+ autoload :CSVIterator, 'csv_utils/csv_iterator'
6
7
  autoload :CSVOptions, 'csv_utils/csv_options'
7
8
  autoload :CSVReport, 'csv_utils/csv_report'
8
9
  autoload :CSVRow, 'csv_utils/csv_row'
@@ -0,0 +1,51 @@
1
+ # Search a CSV given a series of steps
2
+ class CSVUtils::CSVIterator
3
+ include Enumerable
4
+
5
+ attr_reader :prev_row
6
+
7
+ class RowWrapper < Hash
8
+ attr_accessor :lineno
9
+
10
+ def self.create(headers, row, lineno)
11
+ row_wrapper = RowWrapper[headers.zip(row)]
12
+ row_wrapper.lineno = lineno
13
+ row_wrapper
14
+ end
15
+
16
+ def to_pretty_s
17
+ reject { |_, v| v.strip.empty? }
18
+ .each_with_index
19
+ .map { |(k, v), idx| sprintf(' %-3d %s: %s', idx+1, k, v) }
20
+ .join("\n") + "\n"
21
+ end
22
+ end
23
+
24
+ def initialize(src_csv, csv_options = {})
25
+ @src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
26
+ end
27
+
28
+ def each(headers = nil)
29
+ @src_csv.rewind
30
+
31
+ lineno = 0
32
+ unless headers
33
+ headers = @src_csv.shift
34
+ strip_bom!(headers[0])
35
+ lineno += 1
36
+ end
37
+
38
+ @prev_row = nil
39
+ while (row = @src_csv.shift)
40
+ lineno += 1
41
+ yield RowWrapper.create(headers, row, lineno)
42
+ @prev_row = row
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def strip_bom!(col)
49
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
50
+ end
51
+ end
@@ -15,13 +15,14 @@ module CSVUtils
15
15
  csv
16
16
  end
17
17
 
18
- generate(headers, &block) if block
18
+ add_headers(headers) if headers
19
+
20
+ generate(&block) if block
19
21
  end
20
22
 
21
- def generate(headers = nil)
22
- add_headers(headers) if headers
23
+ def generate
23
24
  yield self
24
- @csv.close if @must_close
25
+ close if @must_close
25
26
  end
26
27
 
27
28
  def append(csv_row)
@@ -37,5 +38,9 @@ module CSVUtils
37
38
  def add_headers(csv_row)
38
39
  append(csv_row.is_a?(Array) ? csv_row : csv_row.csv_headers)
39
40
  end
41
+
42
+ def close
43
+ @csv.close
44
+ end
40
45
  end
41
46
  end
@@ -35,6 +35,10 @@ class CSVUtils::CSVWrapper
35
35
  csv.shift
36
36
  end
37
37
 
38
+ def rewind
39
+ csv.rewind
40
+ end
41
+
38
42
  def close
39
43
  csv.close if close_when_done?
40
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-20 00:00:00.000000000 Z
11
+ date: 2020-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -28,6 +28,7 @@ description: Tools for debugging malformed CSV files
28
28
  email: dougyouch@gmail.com
29
29
  executables:
30
30
  - csv-change-eol
31
+ - csv-explorer
31
32
  - csv-find-error
32
33
  - csv-readline
33
34
  - csv-validator
@@ -42,12 +43,14 @@ files:
42
43
  - LICENSE
43
44
  - README.md
44
45
  - bin/csv-change-eol
46
+ - bin/csv-explorer
45
47
  - bin/csv-find-error
46
48
  - bin/csv-readline
47
49
  - bin/csv-validator
48
50
  - csv-utils.gemspec
49
51
  - lib/csv-utils.rb
50
52
  - lib/csv_utils/csv_extender.rb
53
+ - lib/csv_utils/csv_iterator.rb
51
54
  - lib/csv_utils/csv_options.rb
52
55
  - lib/csv_utils/csv_report.rb
53
56
  - lib/csv_utils/csv_row.rb