csv-utils 0.3.0 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 98303ab9b2df05bc501c1c66b66a62be5ade9d79ab38a5b8bda8eb52d91b26cc
4
- data.tar.gz: 8adfd2144220de2cc4f23136ee4eb7314a3c16eeac68be87e1dc19b1ac7dc350
3
+ metadata.gz: fc87e9b60715e648c97c91c1bbe562eb72a6fd964782abf3f1468588c053537b
4
+ data.tar.gz: c368dda5d1b829a6dff9ac934e42b716c8c704d3a4605f8c4b195d1a10bcc94e
5
5
  SHA512:
6
- metadata.gz: a2a2b2067a9ca06920b171230a122eba479c1f91af3919e2965eaec6d073fff34d544221a92cffaa1b9546078960aee0c9b9031e7b652368e975cff9b196214c
7
- data.tar.gz: '0786cfb3e75771ccb68bfa0e2cba42994c7c04a5c8be14432ae6467425536e7dfd4a4ef33403ae5bd129eafd871a07077610c000a78762052e0b055192c0cc16'
6
+ metadata.gz: f8ead05ff26fcb2540403bcb47e5eacad33fcacf58f65ff48442406d0556f6dc5a23164258303a779a718ebc6bd8d232f6bad93d20d49275da05988bb0cc39c1
7
+ data.tar.gz: f5c9d16c15fc3af24947cb1aeb122962f26483cfba282a6676b1a47c8ecc9504952785898a0c94c771520b60fc3fbc84f3453c76ba9ebd896f0e19e546b23f69
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv-utils'
4
+ require 'irb'
5
+
6
+ CSV_FILE = ARGV.shift
7
+
8
+ def csv
9
+ @csv ||= CSVUtils::CSVIterator.new(CSV_FILE)
10
+ end
11
+
12
+ IRB.start
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv'
4
+ begin
5
+ require 'rchardet'
6
+ rescue LoadError
7
+ $stderr.puts 'gem install rchardet'
8
+ exit 1
9
+ end
10
+
11
+ def utf8?(str)
12
+ str
13
+ .force_encoding('utf-8')
14
+ .valid_encoding?
15
+ end
16
+
17
+ def convert_to_utf8(str, current_encoding)
18
+ str.force_encoding(current_encoding)
19
+ return nil unless str.valid_encoding?
20
+
21
+ str.encode('utf-8')
22
+ end
23
+
24
+ def detect_encoding(col)
25
+ CharDet.detect(col)['encoding']
26
+ end
27
+
28
+ def strip_bom!(col)
29
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
30
+ end
31
+
32
+ csv = CSV.open(ARGV[0], 'rb')
33
+ id_column_name = ARGV[1]
34
+
35
+ headers = csv.shift
36
+ strip_bom!(headers[0])
37
+
38
+ id_column_num = nil
39
+ if id_column_name
40
+ unless headers.include?(id_column_name)
41
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
42
+ exit 1
43
+ end
44
+
45
+ id_column_num = headers.index(id_column_name)
46
+ end
47
+
48
+ out = nil
49
+ if id_column_num
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ end
53
+
54
+ csv_lineno = 1
55
+
56
+ while (row = csv.shift)
57
+ csv_lineno += 1
58
+
59
+ unless row.size == headers.size
60
+ $stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}"
61
+ end
62
+
63
+ row.each_with_index do |col, idx|
64
+ next if utf8?(col)
65
+
66
+ $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
67
+ if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
68
+ puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
69
+ out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
70
+ else
71
+ $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
72
+ end
73
+ end
74
+ end
75
+
76
+ csv.close
77
+ out.close if out
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.0'
5
+ s.version = '0.3.5'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
@@ -3,6 +3,7 @@ require 'csv'
3
3
  # Collection of tools for working with CSV files.
4
4
  module CSVUtils
5
5
  autoload :CSVExtender, 'csv_utils/csv_extender'
6
+ autoload :CSVIterator, 'csv_utils/csv_iterator'
6
7
  autoload :CSVOptions, 'csv_utils/csv_options'
7
8
  autoload :CSVReport, 'csv_utils/csv_report'
8
9
  autoload :CSVRow, 'csv_utils/csv_row'
@@ -0,0 +1,47 @@
1
+ # Search a CSV given a series of steps
2
+ class CSVUtils::CSVIterator
3
+ include Enumerable
4
+
5
+ class RowWrapper < Hash
6
+ attr_accessor :lineno
7
+
8
+ def self.create(headers, row, lineno)
9
+ row_wrapper = RowWrapper[headers.zip(row)]
10
+ row_wrapper.lineno = lineno
11
+ row_wrapper
12
+ end
13
+
14
+ def to_pretty_s
15
+ reject { |_, v| v.strip.empty? }
16
+ .each_with_index
17
+ .map { |(k, v), idx| sprintf(' %-3d %s: %s', idx+1, k, v) }
18
+ .join("\n") + "\n"
19
+ end
20
+ end
21
+
22
+ def initialize(src_csv, csv_options = {})
23
+ @src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
24
+ end
25
+
26
+ def each(headers = nil)
27
+ @src_csv.rewind
28
+
29
+ lineno = 0
30
+ unless headers
31
+ headers = @src_csv.shift
32
+ strip_bom!(headers[0])
33
+ lineno += 1
34
+ end
35
+
36
+ while (row = @src_csv.shift)
37
+ lineno += 1
38
+ yield RowWrapper.create(headers, row, lineno)
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def strip_bom!(col)
45
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
46
+ end
47
+ end
@@ -35,6 +35,10 @@ class CSVUtils::CSVWrapper
35
35
  csv.shift
36
36
  end
37
37
 
38
+ def rewind
39
+ csv.rewind
40
+ end
41
+
38
42
  def close
39
43
  csv.close if close_when_done?
40
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-19 00:00:00.000000000 Z
11
+ date: 2020-07-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -28,8 +28,10 @@ description: Tools for debugging malformed CSV files
28
28
  email: dougyouch@gmail.com
29
29
  executables:
30
30
  - csv-change-eol
31
+ - csv-explorer
31
32
  - csv-find-error
32
33
  - csv-readline
34
+ - csv-validator
33
35
  extensions: []
34
36
  extra_rdoc_files: []
35
37
  files:
@@ -41,11 +43,14 @@ files:
41
43
  - LICENSE
42
44
  - README.md
43
45
  - bin/csv-change-eol
46
+ - bin/csv-explorer
44
47
  - bin/csv-find-error
45
48
  - bin/csv-readline
49
+ - bin/csv-validator
46
50
  - csv-utils.gemspec
47
51
  - lib/csv-utils.rb
48
52
  - lib/csv_utils/csv_extender.rb
53
+ - lib/csv_utils/csv_iterator.rb
49
54
  - lib/csv_utils/csv_options.rb
50
55
  - lib/csv_utils/csv_report.rb
51
56
  - lib/csv_utils/csv_row.rb