csv-utils 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/csv-validator +47 -0
- data/csv-utils.gemspec +1 -1
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '019dcd269f036bc21e93019e567e8a0223d8436e87c56519d74af02383640bdf'
|
4
|
+
data.tar.gz: 34b4e8035a533e897c395943e892de0bae16fbdc3847a4990cc0281225d21bd4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e770276baa097fa30551266882910818f331890c6e9bfd7fa92ab01654826a14ad3b67f151f2ce858c816d6d628170174fcc872038636998c15c818dc129130a
|
7
|
+
data.tar.gz: a1689e7404f5d9b70f092b7cda83c8f200df0af1725d27ecc222d798bafcbb1ea1612cbff74dbef493fe427a12ff8330f7828ed608becb4a3d3cd7267179319e
|
data/bin/csv-validator
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'rchardet'
|
5
|
+
|
6
|
+
def utf8?(str)
|
7
|
+
str
|
8
|
+
.force_encoding('utf-8')
|
9
|
+
.valid_encoding?
|
10
|
+
end
|
11
|
+
|
12
|
+
def convert_to_utf8(str, current_encoding)
|
13
|
+
str.force_encoding(current_encoding)
|
14
|
+
return nil unless str.valid_encoding?
|
15
|
+
|
16
|
+
str.encode('utf-8')
|
17
|
+
end
|
18
|
+
|
19
|
+
def detect_encoding(col)
|
20
|
+
CharDet.detect(col)['encoding']
|
21
|
+
end
|
22
|
+
|
23
|
+
csv = CSV.open(ARGV[0], 'rb')
|
24
|
+
|
25
|
+
headers = csv.shift
|
26
|
+
csv_lineno = 1
|
27
|
+
|
28
|
+
while (row = csv.shift)
|
29
|
+
csv_lineno += 1
|
30
|
+
|
31
|
+
unless row.size == headers.size
|
32
|
+
$stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}"
|
33
|
+
end
|
34
|
+
|
35
|
+
row.each_with_index do |col, idx|
|
36
|
+
next if utf8?(col)
|
37
|
+
|
38
|
+
$stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): none UTF-8 characters found in \"#{col}\""
|
39
|
+
if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
|
40
|
+
puts "row(#{csv_lineno}),col(#{idx + 1}): converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
|
41
|
+
else
|
42
|
+
$stderr.puts "row(#{csv_lineno}),col(#{idx + 1}): unknown character encoding"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
csv.close
|
data/csv-utils.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
@@ -30,6 +30,7 @@ executables:
|
|
30
30
|
- csv-change-eol
|
31
31
|
- csv-find-error
|
32
32
|
- csv-readline
|
33
|
+
- csv-validator
|
33
34
|
extensions: []
|
34
35
|
extra_rdoc_files: []
|
35
36
|
files:
|
@@ -43,6 +44,7 @@ files:
|
|
43
44
|
- bin/csv-change-eol
|
44
45
|
- bin/csv-find-error
|
45
46
|
- bin/csv-readline
|
47
|
+
- bin/csv-validator
|
46
48
|
- csv-utils.gemspec
|
47
49
|
- lib/csv-utils.rb
|
48
50
|
- lib/csv_utils/csv_extender.rb
|