csv-utils 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/csv-validator +24 -7
  3. data/csv-utils.gemspec +1 -1
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a28b89b25aa7a6a90137b799c580aa9060d783f481000fd173281bc7e2367baf
4
- data.tar.gz: 7317ece2b8970a816b5e4b5b84623a2c966c3a2f7afa7c2dd30de831f7e3ac64
3
+ metadata.gz: 8e649e8e220856a0676e01ef58811ab9df3822a1757eae4e776a45256063c510
4
+ data.tar.gz: fad8bcac595659bc5d91b4ebc19ee8f18ebbdf910568e75ff812768e1c23515f
5
5
  SHA512:
6
- metadata.gz: e057cc9795559c630e4b939c67d00547a25e0cf83ed75b15713a6c381677a718dd2ac06f975486e5cdf45a1cbfd962caf2aa13105e57a544aa4b6fd177926445
7
- data.tar.gz: ee7552522db9fb683b8cbd73a94706a0a41e92135aed0c2ebec6c70754f1bcad210234b8e5b18da87322bbc6737b6f323c515a1920f09fda3230a5cdef2393d1
6
+ metadata.gz: 4e4a26c3494c466a0099f72db534b8bd73910444c6eadf1b1faa78213d225b6752e9e8b60818bbeabe57f4542bd235da1969a9b6a3bad968328622531f89f077
7
+ data.tar.gz: d35d7a46f4f58a0fe87e10c6bcfec3ededc840d5d3c7ce0f1e5162e22f6f9ddb1516ed23a59b93877529ae326d597dfbde5089d9676393d488e476f6a9ad0924
@@ -25,11 +25,32 @@ def detect_encoding(col)
25
25
  CharDet.detect(col)['encoding']
26
26
  end
27
27
 
28
+ def strip_bom!(col)
29
+ col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
30
+ end
31
+
28
32
  csv = CSV.open(ARGV[0], 'rb')
29
- out = CSV.open(ARGV[1], 'wb') if ARGV[1]
33
+ id_column_name = ARGV[1]
30
34
 
31
35
  headers = csv.shift
32
- out << headers if out
36
+ strip_bom!(headers[0])
37
+
38
+ id_column_num = nil
39
+ if id_column_name
40
+ unless headers.include?(id_column_name)
41
+ $stderr.puts("header #{id_column_name} not found in current set of headers")
42
+ exit 1
43
+ end
44
+
45
+ id_column_num = headers.index(id_column_name)
46
+ end
47
+
48
+ out = nil
49
+ if id_column_num
50
+ out = CSV.open('utf8-correctsion.csv', 'wb')
51
+ out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
52
+ end
53
+
33
54
  csv_lineno = 1
34
55
 
35
56
  while (row = csv.shift)
@@ -39,21 +60,17 @@ while (row = csv.shift)
39
60
  $stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}"
40
61
  end
41
62
 
42
- converted = false
43
63
  row.each_with_index do |col, idx|
44
64
  next if utf8?(col)
45
65
 
46
66
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
47
67
  if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
48
- converted = true
49
68
  puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
50
- row[idx] = col_utf8_encoded
69
+ out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
51
70
  else
52
71
  $stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
53
72
  end
54
73
  end
55
-
56
- out << row if out && converted
57
74
  end
58
75
 
59
76
  csv.close
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.3.3'
5
+ s.version = '0.3.4'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch