csv-utils 0.3.1 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '019dcd269f036bc21e93019e567e8a0223d8436e87c56519d74af02383640bdf'
4
- data.tar.gz: 34b4e8035a533e897c395943e892de0bae16fbdc3847a4990cc0281225d21bd4
3
+ metadata.gz: d21af111b4ae04f90e8462cfbfc55fe7a61c3181bd095f2bc6e77f19bda30f9d
4
+ data.tar.gz: 0bb483d5fa060b077908e26795a3e62288931f14974b10dbfb34608ca4c6f981
5
5
  SHA512:
6
- metadata.gz: e770276baa097fa30551266882910818f331890c6e9bfd7fa92ab01654826a14ad3b67f151f2ce858c816d6d628170174fcc872038636998c15c818dc129130a
7
- data.tar.gz: a1689e7404f5d9b70f092b7cda83c8f200df0af1725d27ecc222d798bafcbb1ea1612cbff74dbef493fe427a12ff8330f7828ed608becb4a3d3cd7267179319e
6
+ metadata.gz: d81a50d94a11e97a481c081cf5ca1e7428ecee3c0815d5c6a1305f5d3cdab2110a880cb4dfd03a930feacd3558a34cac531a0ae85e9045d4826ce64e9a5c1f11
7
+ data.tar.gz: e38f2ba7976c2535a860f1315afe4417974a72d55ed60e9104eda9b41cab9122473116b50b91375396020d3a153e8b4722754bb1a0f4a010bbc68bb30938efa2
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv-utils'
4
+ require 'irb'
5
+
6
+ CSV_FILE = ARGV.shift
7
+
8
+ def csv
9
+ @csv ||= CSVUtils::CSVIterator.new(CSV_FILE)
10
+ end
11
+
12
+ IRB.start
@@ -3,10 +3,12 @@
3
3
  require 'csv'
4
4
  require 'shellwords'
5
5
 
6
+ prev_row = nil
6
7
  begin
7
- CSV.open(ARGV[0], 'rb').each { }
8
+ CSV.open(ARGV[0], 'rb').each { |row| prev_row = row }
8
9
  rescue CSV::MalformedCSVError => e
9
10
  puts e.class.to_s + ': ' + e.message
11
+ puts "previous row was #{prev_row}"
10
12
  if e.message =~ /line (\d+)/
11
13
  lineno = $1.to_i
12
14
  cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
@@ -1,7 +1,12 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'csv'
4
- require 'rchardet'
4
+ begin
5
+ require 'rchardet'
6
+ rescue LoadError
7
+ $stderr.puts 'gem install rchardet'
8
+ exit 1
9
+ end
5
10
 
6
11
  def utf8?(str)
7
12
  str
@@ -20,9 +25,32 @@ def detect_encoding(col)
20
25
  CharDet.detect(col)['encoding']
21
26
  end
22
27
 
28
+ def strip_bom!(col)
29
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
30
+ end
31
+
23
32
  csv = CSV.open(ARGV[0], 'rb')
33
+ id_column_name = ARGV[1]
24
34
 
25
35
  headers = csv.shift
36
+ strip_bom!(headers[0])
37
+
38
+ id_column_num = nil
39
+ if id_column_name
40
+ unless headers.include?(id_column_name)
41
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
42
+ exit 1
43
+ end
44
+
45
+ id_column_num = headers.index(id_column_name)
46
+ end
47
+
48
+ out = nil
49
+ if id_column_num
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ end
53
+
26
54
  csv_lineno = 1
27
55
 
28
56
  while (row = csv.shift)
@@ -33,15 +61,17 @@ while (row = csv.shift)
33
61
  end
34
62
 
35
63
  row.each_with_index do |col, idx|
36
- next if utf8?(col)
64
+ next if col.nil? || utf8?(col)
37
65
 
38
- $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): none UTF-8 characters found in \"#{col}\""
66
+ $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
39
67
  if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
40
- puts "row(#{csv_lineno}),col(#{idx + 1}): converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
68
+ puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
69
+ out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
41
70
  else
42
- $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): unknown character encoding"
71
+ $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
43
72
  end
44
73
  end
45
74
  end
46
75
 
47
76
  csv.close
77
+ out.close if out
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.1'
5
+ s.version = '0.3.6'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
@@ -3,6 +3,7 @@ require 'csv'
3
3
  # Collection of tools for working with CSV files.
4
4
  module CSVUtils
5
5
  autoload :CSVExtender, 'csv_utils/csv_extender'
6
+ autoload :CSVIterator, 'csv_utils/csv_iterator'
6
7
  autoload :CSVOptions, 'csv_utils/csv_options'
7
8
  autoload :CSVReport, 'csv_utils/csv_report'
8
9
  autoload :CSVRow, 'csv_utils/csv_row'
@@ -0,0 +1,51 @@
1
+ # Search a CSV given a series of steps
2
+ class CSVUtils::CSVIterator
3
+ include Enumerable
4
+
5
+ attr_reader :prev_row
6
+
7
+ class RowWrapper < Hash
8
+ attr_accessor :lineno
9
+
10
+ def self.create(headers, row, lineno)
11
+ row_wrapper = RowWrapper[headers.zip(row)]
12
+ row_wrapper.lineno = lineno
13
+ row_wrapper
14
+ end
15
+
16
+ def to_pretty_s
17
+ reject { |_, v| v.strip.empty? }
18
+ .each_with_index
19
+ .map { |(k, v), idx| sprintf(' %-3d %s: %s', idx+1, k, v) }
20
+ .join("\n") + "\n"
21
+ end
22
+ end
23
+
24
+ def initialize(src_csv, csv_options = {})
25
+ @src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
26
+ end
27
+
28
+ def each(headers = nil)
29
+ @src_csv.rewind
30
+
31
+ lineno = 0
32
+ unless headers
33
+ headers = @src_csv.shift
34
+ strip_bom!(headers[0])
35
+ lineno += 1
36
+ end
37
+
38
+ @prev_row = nil
39
+ while (row = @src_csv.shift)
40
+ lineno += 1
41
+ yield RowWrapper.create(headers, row, lineno)
42
+ @prev_row = row
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def strip_bom!(col)
49
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
50
+ end
51
+ end
@@ -35,6 +35,10 @@ class CSVUtils::CSVWrapper
35
35
  csv.shift
36
36
  end
37
37
 
38
+ def rewind
39
+ csv.rewind
40
+ end
41
+
38
42
  def close
39
43
  csv.close if close_when_done?
40
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-19 00:00:00.000000000 Z
11
+ date: 2020-09-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -28,6 +28,7 @@ description: Tools for debugging malformed CSV files
28
28
  email: dougyouch@gmail.com
29
29
  executables:
30
30
  - csv-change-eol
31
+ - csv-explorer
31
32
  - csv-find-error
32
33
  - csv-readline
33
34
  - csv-validator
@@ -42,12 +43,14 @@ files:
42
43
  - LICENSE
43
44
  - README.md
44
45
  - bin/csv-change-eol
46
+ - bin/csv-explorer
45
47
  - bin/csv-find-error
46
48
  - bin/csv-readline
47
49
  - bin/csv-validator
48
50
  - csv-utils.gemspec
49
51
  - lib/csv-utils.rb
50
52
  - lib/csv_utils/csv_extender.rb
53
+ - lib/csv_utils/csv_iterator.rb
51
54
  - lib/csv_utils/csv_options.rb
52
55
  - lib/csv_utils/csv_report.rb
53
56
  - lib/csv_utils/csv_row.rb