csv-utils 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/csv-validator +47 -0
  3. data/csv-utils.gemspec +1 -1
  4. metadata +3 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 98303ab9b2df05bc501c1c66b66a62be5ade9d79ab38a5b8bda8eb52d91b26cc
4
- data.tar.gz: 8adfd2144220de2cc4f23136ee4eb7314a3c16eeac68be87e1dc19b1ac7dc350
3
+ metadata.gz: '019dcd269f036bc21e93019e567e8a0223d8436e87c56519d74af02383640bdf'
4
+ data.tar.gz: 34b4e8035a533e897c395943e892de0bae16fbdc3847a4990cc0281225d21bd4
5
5
  SHA512:
6
- metadata.gz: a2a2b2067a9ca06920b171230a122eba479c1f91af3919e2965eaec6d073fff34d544221a92cffaa1b9546078960aee0c9b9031e7b652368e975cff9b196214c
7
- data.tar.gz: '0786cfb3e75771ccb68bfa0e2cba42994c7c04a5c8be14432ae6467425536e7dfd4a4ef33403ae5bd129eafd871a07077610c000a78762052e0b055192c0cc16'
6
+ metadata.gz: e770276baa097fa30551266882910818f331890c6e9bfd7fa92ab01654826a14ad3b67f151f2ce858c816d6d628170174fcc872038636998c15c818dc129130a
7
+ data.tar.gz: a1689e7404f5d9b70f092b7cda83c8f200df0af1725d27ecc222d798bafcbb1ea1612cbff74dbef493fe427a12ff8330f7828ed608becb4a3d3cd7267179319e
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv'
4
+ require 'rchardet'
5
+
6
+ def utf8?(str)
7
+ str
8
+ .force_encoding('utf-8')
9
+ .valid_encoding?
10
+ end
11
+
12
+ def convert_to_utf8(str, current_encoding)
13
+ str.force_encoding(current_encoding)
14
+ return nil unless str.valid_encoding?
15
+
16
+ str.encode('utf-8')
17
+ end
18
+
19
+ def detect_encoding(col)
20
+ CharDet.detect(col)['encoding']
21
+ end
22
+
23
+ csv = CSV.open(ARGV[0], 'rb')
24
+
25
+ headers = csv.shift
26
+ csv_lineno = 1
27
+
28
+ while (row = csv.shift)
29
+ csv_lineno += 1
30
+
31
+ unless row.size == headers.size
32
+ $stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}"
33
+ end
34
+
35
+ row.each_with_index do |col, idx|
36
+ next if utf8?(col)
37
+
38
+ $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): none UTF-8 characters found in \"#{col}\""
39
+ if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
40
+ puts "row(#{csv_lineno}),col(#{idx + 1}): converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
41
+ else
42
+ $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): unknown character encoding"
43
+ end
44
+ end
45
+ end
46
+
47
+ csv.close
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.0'
5
+ s.version = '0.3.1'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
@@ -30,6 +30,7 @@ executables:
30
30
  - csv-change-eol
31
31
  - csv-find-error
32
32
  - csv-readline
33
+ - csv-validator
33
34
  extensions: []
34
35
  extra_rdoc_files: []
35
36
  files:
@@ -43,6 +44,7 @@ files:
43
44
  - bin/csv-change-eol
44
45
  - bin/csv-find-error
45
46
  - bin/csv-readline
47
+ - bin/csv-validator
46
48
  - csv-utils.gemspec
47
49
  - lib/csv-utils.rb
48
50
  - lib/csv_utils/csv_extender.rb