csv-utils 0.3.2 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7b3f6bdded232bf3be2009d4bbb5ab99e083cd9f48dff5fabe89324f5217550a
4
- data.tar.gz: 86b130f177d173a74bc1611c5677cc6bad5053e953a1a5fabfeabee494e372c8
3
+ metadata.gz: '0248ce0ef1db03a751c864fdf52194b25a21f21ef90cbeaf8d0ae07075cbaf51'
4
+ data.tar.gz: c48e4f907963b1ace89fecb095f2a7ee17ec1670db2371dd4e015005800b757b
5
5
  SHA512:
6
- metadata.gz: dd0a299cbe4b153f122d605bbb1b2ab08726194d58010e909090048f1cef0e384d08c8a3a59683b8108c1dc3766deacac46a9063af35e57502f6fe3c92b855ff
7
- data.tar.gz: e4233e0c38338d24a6105d465745092146a65e03f297442fabada13ecf380db70740196726ec2d40a8f92828b6cfbc6945c0274701091cb2687af549126e34fa
6
+ metadata.gz: 848fa57de005d00bdf24cd07b533680cd9307c07a19b2fb955391737e36407dfc6be22edef55742fb0c90e8ed5273d4c584840e7d7a657404c1242947a88692b
7
+ data.tar.gz: 71209d88db0d67ed606e4db56e3cf667081074523373951fea44698537af0661cf63c79eb9b971419bb13bfc2a8b1b9c10c2f89e911b913b4f6c4bc8d00ee061
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv-utils'
4
+ require 'irb'
5
+
6
+ CSV_FILE = ARGV.shift
7
+
8
+ def csv
9
+ @csv ||= CSVUtils::CSVIterator.new(CSV_FILE)
10
+ end
11
+
12
+ IRB.start
@@ -3,10 +3,12 @@
3
3
  require 'csv'
4
4
  require 'shellwords'
5
5
 
6
+ prev_row = nil
6
7
  begin
7
- CSV.open(ARGV[0], 'rb').each { }
8
+ CSV.open(ARGV[0], 'rb').each { |row| prev_row = row }
8
9
  rescue CSV::MalformedCSVError => e
9
10
  puts e.class.to_s + ': ' + e.message
11
+ puts "previous row was #{prev_row}"
10
12
  if e.message =~ /line (\d+)/
11
13
  lineno = $1.to_i
12
14
  cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
@@ -25,11 +25,32 @@ def detect_encoding(col)
25
25
  CharDet.detect(col)['encoding']
26
26
  end
27
27
 
28
+ def strip_bom!(col)
29
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
30
+ end
31
+
28
32
  csv = CSV.open(ARGV[0], 'rb')
29
- out = CSV.open(ARGV[1], 'wb') if ARGV[1]
33
+ id_column_name = ARGV[1]
30
34
 
31
35
  headers = csv.shift
32
- out << headers if out
36
+ strip_bom!(headers[0])
37
+
38
+ id_column_num = nil
39
+ if id_column_name
40
+ unless headers.include?(id_column_name)
41
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
42
+ exit 1
43
+ end
44
+
45
+ id_column_num = headers.index(id_column_name)
46
+ end
47
+
48
+ out = nil
49
+ if id_column_num
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ end
53
+
33
54
  csv_lineno = 1
34
55
 
35
56
  while (row = csv.shift)
@@ -39,21 +60,17 @@ while (row = csv.shift)
39
60
  $stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}"
40
61
  end
41
62
 
42
- converted = false
43
63
  row.each_with_index do |col, idx|
44
- next if utf8?(col)
64
+ next if col.nil? || utf8?(col)
45
65
 
46
- $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): none UTF-8 characters found in \"#{col}\""
66
+ $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
47
67
  if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
48
- converted = true
49
- puts "row(#{csv_lineno}),col(#{idx + 1}): converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
50
- row[idx] = col_utf8_encoded
68
+ puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
69
+ out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
51
70
  else
52
- $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): unknown character encoding"
71
+ $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
53
72
  end
54
73
  end
55
-
56
- out << row if out && converted
57
74
  end
58
75
 
59
76
  csv.close
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.2'
5
+ s.version = '0.3.7'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
@@ -3,6 +3,7 @@ require 'csv'
3
3
  # Collection of tools for working with CSV files.
4
4
  module CSVUtils
5
5
  autoload :CSVExtender, 'csv_utils/csv_extender'
6
+ autoload :CSVIterator, 'csv_utils/csv_iterator'
6
7
  autoload :CSVOptions, 'csv_utils/csv_options'
7
8
  autoload :CSVReport, 'csv_utils/csv_report'
8
9
  autoload :CSVRow, 'csv_utils/csv_row'
@@ -0,0 +1,51 @@
1
+ # Search a CSV given a series of steps
2
+ class CSVUtils::CSVIterator
3
+ include Enumerable
4
+
5
+ attr_reader :prev_row
6
+
7
+ class RowWrapper < Hash
8
+ attr_accessor :lineno
9
+
10
+ def self.create(headers, row, lineno)
11
+ row_wrapper = RowWrapper[headers.zip(row)]
12
+ row_wrapper.lineno = lineno
13
+ row_wrapper
14
+ end
15
+
16
+ def to_pretty_s
17
+ reject { |_, v| v.strip.empty? }
18
+ .each_with_index
19
+ .map { |(k, v), idx| sprintf(' %-3d %s: %s', idx+1, k, v) }
20
+ .join("\n") + "\n"
21
+ end
22
+ end
23
+
24
+ def initialize(src_csv, csv_options = {})
25
+ @src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
26
+ end
27
+
28
+ def each(headers = nil)
29
+ @src_csv.rewind
30
+
31
+ lineno = 0
32
+ unless headers
33
+ headers = @src_csv.shift
34
+ strip_bom!(headers[0])
35
+ lineno += 1
36
+ end
37
+
38
+ @prev_row = nil
39
+ while (row = @src_csv.shift)
40
+ lineno += 1
41
+ yield RowWrapper.create(headers, row, lineno)
42
+ @prev_row = row
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def strip_bom!(col)
49
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
50
+ end
51
+ end
@@ -21,7 +21,7 @@ module CSVUtils
21
21
  def generate(headers = nil)
22
22
  add_headers(headers) if headers
23
23
  yield self
24
- @csv.close if @must_close
24
+ close if @must_close
25
25
  end
26
26
 
27
27
  def append(csv_row)
@@ -37,5 +37,9 @@ module CSVUtils
37
37
  def add_headers(csv_row)
38
38
  append(csv_row.is_a?(Array) ? csv_row : csv_row.csv_headers)
39
39
  end
40
+
41
+ def close
42
+ @csv.close
43
+ end
40
44
  end
41
45
  end
@@ -35,6 +35,10 @@ class CSVUtils::CSVWrapper
35
35
  csv.shift
36
36
  end
37
37
 
38
+ def rewind
39
+ csv.rewind
40
+ end
41
+
38
42
  def close
39
43
  csv.close if close_when_done?
40
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-20 00:00:00.000000000 Z
11
+ date: 2020-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -28,6 +28,7 @@ description: Tools for debugging malformed CSV files
28
28
  email: dougyouch@gmail.com
29
29
  executables:
30
30
  - csv-change-eol
31
+ - csv-explorer
31
32
  - csv-find-error
32
33
  - csv-readline
33
34
  - csv-validator
@@ -42,12 +43,14 @@ files:
42
43
  - LICENSE
43
44
  - README.md
44
45
  - bin/csv-change-eol
46
+ - bin/csv-explorer
45
47
  - bin/csv-find-error
46
48
  - bin/csv-readline
47
49
  - bin/csv-validator
48
50
  - csv-utils.gemspec
49
51
  - lib/csv-utils.rb
50
52
  - lib/csv_utils/csv_extender.rb
53
+ - lib/csv_utils/csv_iterator.rb
51
54
  - lib/csv_utils/csv_options.rb
52
55
  - lib/csv_utils/csv_report.rb
53
56
  - lib/csv_utils/csv_row.rb