csv-utils 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/csv-validator +47 -0
  3. data/csv-utils.gemspec +1 -1
  4. metadata +3 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 98303ab9b2df05bc501c1c66b66a62be5ade9d79ab38a5b8bda8eb52d91b26cc
4
- data.tar.gz: 8adfd2144220de2cc4f23136ee4eb7314a3c16eeac68be87e1dc19b1ac7dc350
3
+ metadata.gz: '019dcd269f036bc21e93019e567e8a0223d8436e87c56519d74af02383640bdf'
4
+ data.tar.gz: 34b4e8035a533e897c395943e892de0bae16fbdc3847a4990cc0281225d21bd4
5
5
  SHA512:
6
- metadata.gz: a2a2b2067a9ca06920b171230a122eba479c1f91af3919e2965eaec6d073fff34d544221a92cffaa1b9546078960aee0c9b9031e7b652368e975cff9b196214c
7
- data.tar.gz: '0786cfb3e75771ccb68bfa0e2cba42994c7c04a5c8be14432ae6467425536e7dfd4a4ef33403ae5bd129eafd871a07077610c000a78762052e0b055192c0cc16'
6
+ metadata.gz: e770276baa097fa30551266882910818f331890c6e9bfd7fa92ab01654826a14ad3b67f151f2ce858c816d6d628170174fcc872038636998c15c818dc129130a
7
+ data.tar.gz: a1689e7404f5d9b70f092b7cda83c8f200df0af1725d27ecc222d798bafcbb1ea1612cbff74dbef493fe427a12ff8330f7828ed608becb4a3d3cd7267179319e
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv'
4
+ require 'rchardet'
5
+
6
+ def utf8?(str)
7
+ str
8
+ .force_encoding('utf-8')
9
+ .valid_encoding?
10
+ end
11
+
12
+ def convert_to_utf8(str, current_encoding)
13
+ str.force_encoding(current_encoding)
14
+ return nil unless str.valid_encoding?
15
+
16
+ str.encode('utf-8')
17
+ end
18
+
19
+ def detect_encoding(col)
20
+ CharDet.detect(col)['encoding']
21
+ end
22
+
23
+ csv = CSV.open(ARGV[0], 'rb')
24
+
25
+ headers = csv.shift
26
+ csv_lineno = 1
27
+
28
+ while (row = csv.shift)
29
+ csv_lineno += 1
30
+
31
+ unless row.size == headers.size
32
+ $stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}"
33
+ end
34
+
35
+ row.each_with_index do |col, idx|
36
+ next if utf8?(col)
37
+
38
+ $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): none UTF-8 characters found in \"#{col}\""
39
+ if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
40
+ puts "row(#{csv_lineno}),col(#{idx + 1}): converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
41
+ else
42
+ $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): unknown character encoding"
43
+ end
44
+ end
45
+ end
46
+
47
+ csv.close
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.0'
5
+ s.version = '0.3.1'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
@@ -30,6 +30,7 @@ executables:
30
30
  - csv-change-eol
31
31
  - csv-find-error
32
32
  - csv-readline
33
+ - csv-validator
33
34
  extensions: []
34
35
  extra_rdoc_files: []
35
36
  files:
@@ -43,6 +44,7 @@ files:
43
44
  - bin/csv-change-eol
44
45
  - bin/csv-find-error
45
46
  - bin/csv-readline
47
+ - bin/csv-validator
46
48
  - csv-utils.gemspec
47
49
  - lib/csv-utils.rb
48
50
  - lib/csv_utils/csv_extender.rb