csv-utils 0.3.0 → 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 98303ab9b2df05bc501c1c66b66a62be5ade9d79ab38a5b8bda8eb52d91b26cc
4
- data.tar.gz: 8adfd2144220de2cc4f23136ee4eb7314a3c16eeac68be87e1dc19b1ac7dc350
3
+ metadata.gz: fc87e9b60715e648c97c91c1bbe562eb72a6fd964782abf3f1468588c053537b
4
+ data.tar.gz: c368dda5d1b829a6dff9ac934e42b716c8c704d3a4605f8c4b195d1a10bcc94e
5
5
  SHA512:
6
- metadata.gz: a2a2b2067a9ca06920b171230a122eba479c1f91af3919e2965eaec6d073fff34d544221a92cffaa1b9546078960aee0c9b9031e7b652368e975cff9b196214c
7
- data.tar.gz: '0786cfb3e75771ccb68bfa0e2cba42994c7c04a5c8be14432ae6467425536e7dfd4a4ef33403ae5bd129eafd871a07077610c000a78762052e0b055192c0cc16'
6
+ metadata.gz: f8ead05ff26fcb2540403bcb47e5eacad33fcacf58f65ff48442406d0556f6dc5a23164258303a779a718ebc6bd8d232f6bad93d20d49275da05988bb0cc39c1
7
+ data.tar.gz: f5c9d16c15fc3af24947cb1aeb122962f26483cfba282a6676b1a47c8ecc9504952785898a0c94c771520b60fc3fbc84f3453c76ba9ebd896f0e19e546b23f69
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv-utils'
4
+ require 'irb'
5
+
6
+ CSV_FILE = ARGV.shift
7
+
8
+ def csv
9
+ @csv ||= CSVUtils::CSVIterator.new(CSV_FILE)
10
+ end
11
+
12
+ IRB.start
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv'
4
+ begin
5
+ require 'rchardet'
6
+ rescue LoadError
7
+ $stderr.puts 'gem install rchardet'
8
+ exit 1
9
+ end
10
+
11
+ def utf8?(str)
12
+ str
13
+ .force_encoding('utf-8')
14
+ .valid_encoding?
15
+ end
16
+
17
+ def convert_to_utf8(str, current_encoding)
18
+ str.force_encoding(current_encoding)
19
+ return nil unless str.valid_encoding?
20
+
21
+ str.encode('utf-8')
22
+ end
23
+
24
+ def detect_encoding(col)
25
+ CharDet.detect(col)['encoding']
26
+ end
27
+
28
+ def strip_bom!(col)
29
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
30
+ end
31
+
32
+ csv = CSV.open(ARGV[0], 'rb')
33
+ id_column_name = ARGV[1]
34
+
35
+ headers = csv.shift
36
+ strip_bom!(headers[0])
37
+
38
+ id_column_num = nil
39
+ if id_column_name
40
+ unless headers.include?(id_column_name)
41
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
42
+ exit 1
43
+ end
44
+
45
+ id_column_num = headers.index(id_column_name)
46
+ end
47
+
48
+ out = nil
49
+ if id_column_num
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ end
53
+
54
+ csv_lineno = 1
55
+
56
+ while (row = csv.shift)
57
+ csv_lineno += 1
58
+
59
+ unless row.size == headers.size
60
+ $stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}"
61
+ end
62
+
63
+ row.each_with_index do |col, idx|
64
+ next if utf8?(col)
65
+
66
+ $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
67
+ if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
68
+ puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
69
+ out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
70
+ else
71
+ $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
72
+ end
73
+ end
74
+ end
75
+
76
+ csv.close
77
+ out.close if out
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.0'
5
+ s.version = '0.3.5'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
@@ -3,6 +3,7 @@ require 'csv'
3
3
  # Collection of tools for working with CSV files.
4
4
  module CSVUtils
5
5
  autoload :CSVExtender, 'csv_utils/csv_extender'
6
+ autoload :CSVIterator, 'csv_utils/csv_iterator'
6
7
  autoload :CSVOptions, 'csv_utils/csv_options'
7
8
  autoload :CSVReport, 'csv_utils/csv_report'
8
9
  autoload :CSVRow, 'csv_utils/csv_row'
@@ -0,0 +1,47 @@
1
+ # Search a CSV given a series of steps
2
+ class CSVUtils::CSVIterator
3
+ include Enumerable
4
+
5
+ class RowWrapper < Hash
6
+ attr_accessor :lineno
7
+
8
+ def self.create(headers, row, lineno)
9
+ row_wrapper = RowWrapper[headers.zip(row)]
10
+ row_wrapper.lineno = lineno
11
+ row_wrapper
12
+ end
13
+
14
+ def to_pretty_s
15
+ reject { |_, v| v.strip.empty? }
16
+ .each_with_index
17
+ .map { |(k, v), idx| sprintf(' %-3d %s: %s', idx+1, k, v) }
18
+ .join("\n") + "\n"
19
+ end
20
+ end
21
+
22
+ def initialize(src_csv, csv_options = {})
23
+ @src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
24
+ end
25
+
26
+ def each(headers = nil)
27
+ @src_csv.rewind
28
+
29
+ lineno = 0
30
+ unless headers
31
+ headers = @src_csv.shift
32
+ strip_bom!(headers[0])
33
+ lineno += 1
34
+ end
35
+
36
+ while (row = @src_csv.shift)
37
+ lineno += 1
38
+ yield RowWrapper.create(headers, row, lineno)
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def strip_bom!(col)
45
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
46
+ end
47
+ end
@@ -35,6 +35,10 @@ class CSVUtils::CSVWrapper
35
35
  csv.shift
36
36
  end
37
37
 
38
+ def rewind
39
+ csv.rewind
40
+ end
41
+
38
42
  def close
39
43
  csv.close if close_when_done?
40
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-19 00:00:00.000000000 Z
11
+ date: 2020-07-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -28,8 +28,10 @@ description: Tools for debugging malformed CSV files
28
28
  email: dougyouch@gmail.com
29
29
  executables:
30
30
  - csv-change-eol
31
+ - csv-explorer
31
32
  - csv-find-error
32
33
  - csv-readline
34
+ - csv-validator
33
35
  extensions: []
34
36
  extra_rdoc_files: []
35
37
  files:
@@ -41,11 +43,14 @@ files:
41
43
  - LICENSE
42
44
  - README.md
43
45
  - bin/csv-change-eol
46
+ - bin/csv-explorer
44
47
  - bin/csv-find-error
45
48
  - bin/csv-readline
49
+ - bin/csv-validator
46
50
  - csv-utils.gemspec
47
51
  - lib/csv-utils.rb
48
52
  - lib/csv_utils/csv_extender.rb
53
+ - lib/csv_utils/csv_iterator.rb
49
54
  - lib/csv_utils/csv_options.rb
50
55
  - lib/csv_utils/csv_report.rb
51
56
  - lib/csv_utils/csv_row.rb