csv-utils 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/csv-validator +24 -7
  3. data/csv-utils.gemspec +1 -1
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a28b89b25aa7a6a90137b799c580aa9060d783f481000fd173281bc7e2367baf
4
- data.tar.gz: 7317ece2b8970a816b5e4b5b84623a2c966c3a2f7afa7c2dd30de831f7e3ac64
3
+ metadata.gz: 8e649e8e220856a0676e01ef58811ab9df3822a1757eae4e776a45256063c510
4
+ data.tar.gz: fad8bcac595659bc5d91b4ebc19ee8f18ebbdf910568e75ff812768e1c23515f
5
5
  SHA512:
6
- metadata.gz: e057cc9795559c630e4b939c67d00547a25e0cf83ed75b15713a6c381677a718dd2ac06f975486e5cdf45a1cbfd962caf2aa13105e57a544aa4b6fd177926445
7
- data.tar.gz: ee7552522db9fb683b8cbd73a94706a0a41e92135aed0c2ebec6c70754f1bcad210234b8e5b18da87322bbc6737b6f323c515a1920f09fda3230a5cdef2393d1
6
+ metadata.gz: 4e4a26c3494c466a0099f72db534b8bd73910444c6eadf1b1faa78213d225b6752e9e8b60818bbeabe57f4542bd235da1969a9b6a3bad968328622531f89f077
7
+ data.tar.gz: d35d7a46f4f58a0fe87e10c6bcfec3ededc840d5d3c7ce0f1e5162e22f6f9ddb1516ed23a59b93877529ae326d597dfbde5089d9676393d488e476f6a9ad0924
@@ -25,11 +25,32 @@ def detect_encoding(col)
25
25
  CharDet.detect(col)['encoding']
26
26
  end
27
27
 
28
+ def strip_bom!(col)
29
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
30
+ end
31
+
28
32
  csv = CSV.open(ARGV[0], 'rb')
29
- out = CSV.open(ARGV[1], 'wb') if ARGV[1]
33
+ id_column_name = ARGV[1]
30
34
 
31
35
  headers = csv.shift
32
- out << headers if out
36
+ strip_bom!(headers[0])
37
+
38
+ id_column_num = nil
39
+ if id_column_name
40
+ unless headers.include?(id_column_name)
41
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
42
+ exit 1
43
+ end
44
+
45
+ id_column_num = headers.index(id_column_name)
46
+ end
47
+
48
+ out = nil
49
+ if id_column_num
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ end
53
+
33
54
  csv_lineno = 1
34
55
 
35
56
  while (row = csv.shift)
@@ -39,21 +60,17 @@ while (row = csv.shift)
39
60
  $stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}"
40
61
  end
41
62
 
42
- converted = false
43
63
  row.each_with_index do |col, idx|
44
64
  next if utf8?(col)
45
65
 
46
66
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
47
67
  if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
48
- converted = true
49
68
  puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
50
- row[idx] = col_utf8_encoded
69
+ out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
51
70
  else
52
71
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
53
72
  end
54
73
  end
55
-
56
- out << row if out && converted
57
74
  end
58
75
 
59
76
  csv.close
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.3'
5
+ s.version = '0.3.4'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch