csv-utils 0.3.3 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/csv-explorer +12 -0
- data/bin/csv-find-error +3 -1
- data/bin/csv-validator +25 -8
- data/csv-utils.gemspec +1 -1
- data/lib/csv-utils.rb +1 -0
- data/lib/csv_utils/csv_iterator.rb +51 -0
- data/lib/csv_utils/csv_report.rb +9 -4
- data/lib/csv_utils/csv_wrapper.rb +4 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0847ad28aee7ef73e5bfa9cbd0753e42fed5097ae68bfc2083f6ed2a6d08d66
|
4
|
+
data.tar.gz: '080a53543deadaa49e06616aa9f563818d3a5bbe13b76366f1f795f075be7e41'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8504ec3c569e92e3c7adc141af4077e04085b8befdad8a15c76f05405045dad52821aff1fc26d792eba4d533ea588794d9ab88f96f5c37e2cf7146624090eded
|
7
|
+
data.tar.gz: f4dfa17b0ec81d5923e3ff32978f584a64d567aa8c770dd7b252770f1535ae4eba578dcec105f201ae006929114c90e1615196c6d28ac6fd36bd9a89fb978617
|
data/bin/csv-explorer
ADDED
data/bin/csv-find-error
CHANGED
@@ -3,10 +3,12 @@
|
|
3
3
|
require 'csv'
|
4
4
|
require 'shellwords'
|
5
5
|
|
6
|
+
prev_row = nil
|
6
7
|
begin
|
7
|
-
CSV.open(ARGV[0], 'rb').each { }
|
8
|
+
CSV.open(ARGV[0], 'rb').each { |row| prev_row = row }
|
8
9
|
rescue CSV::MalformedCSVError => e
|
9
10
|
puts e.class.to_s + ': ' + e.message
|
11
|
+
puts "previous row was #{prev_row}"
|
10
12
|
if e.message =~ /line (\d+)/
|
11
13
|
lineno = $1.to_i
|
12
14
|
cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
|
data/bin/csv-validator
CHANGED
@@ -25,11 +25,32 @@ def detect_encoding(col)
|
|
25
25
|
CharDet.detect(col)['encoding']
|
26
26
|
end
|
27
27
|
|
28
|
+
def strip_bom!(col)
|
29
|
+
col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
|
30
|
+
end
|
31
|
+
|
28
32
|
csv = CSV.open(ARGV[0], 'rb')
|
29
|
-
|
33
|
+
id_column_name = ARGV[1]
|
30
34
|
|
31
35
|
headers = csv.shift
|
32
|
-
|
36
|
+
strip_bom!(headers[0])
|
37
|
+
|
38
|
+
id_column_num = nil
|
39
|
+
if id_column_name
|
40
|
+
unless headers.include?(id_column_name)
|
41
|
+
$stderr.puts("header #{id_column_name} not found in current set of headers")
|
42
|
+
exit 1
|
43
|
+
end
|
44
|
+
|
45
|
+
id_column_num = headers.index(id_column_name)
|
46
|
+
end
|
47
|
+
|
48
|
+
out = nil
|
49
|
+
if id_column_num
|
50
|
+
out = CSV.open('utf8-correctsion.csv', 'wb')
|
51
|
+
out << [id_column_name, 'Row', 'Col', 'Header', 'Value']
|
52
|
+
end
|
53
|
+
|
33
54
|
csv_lineno = 1
|
34
55
|
|
35
56
|
while (row = csv.shift)
|
@@ -39,21 +60,17 @@ while (row = csv.shift)
|
|
39
60
|
$stderr.puts "row(#{csv_lineno}): invalid number of columns, expected #{headers.size} got #{row.size}"
|
40
61
|
end
|
41
62
|
|
42
|
-
converted = false
|
43
63
|
row.each_with_index do |col, idx|
|
44
|
-
next if utf8?(col)
|
64
|
+
next if col.nil? || utf8?(col)
|
45
65
|
|
46
66
|
$stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: none UTF-8 characters found in \"#{col}\""
|
47
67
|
if (col_utf8_encoded = convert_to_utf8(col, detect_encoding(col)))
|
48
|
-
converted = true
|
49
68
|
puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: converted to UTF-8 from #{detect_encoding(col)} \"#{col_utf8_encoded}\""
|
50
|
-
row[idx]
|
69
|
+
out << [row[id_column_num], csv_lineno, (idx + 1), headers[idx], col_utf8_encoded]
|
51
70
|
else
|
52
71
|
$stderr.puts "row(#{csv_lineno}),col(#{idx + 1}) #{headers[idx]}: unknown character encoding"
|
53
72
|
end
|
54
73
|
end
|
55
|
-
|
56
|
-
out << row if out && converted
|
57
74
|
end
|
58
75
|
|
59
76
|
csv.close
|
data/csv-utils.gemspec
CHANGED
data/lib/csv-utils.rb
CHANGED
@@ -3,6 +3,7 @@ require 'csv'
|
|
3
3
|
# Collection of tools for working with CSV files.
|
4
4
|
module CSVUtils
|
5
5
|
autoload :CSVExtender, 'csv_utils/csv_extender'
|
6
|
+
autoload :CSVIterator, 'csv_utils/csv_iterator'
|
6
7
|
autoload :CSVOptions, 'csv_utils/csv_options'
|
7
8
|
autoload :CSVReport, 'csv_utils/csv_report'
|
8
9
|
autoload :CSVRow, 'csv_utils/csv_row'
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Search a CSV given a series of steps
|
2
|
+
class CSVUtils::CSVIterator
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
attr_reader :prev_row
|
6
|
+
|
7
|
+
class RowWrapper < Hash
|
8
|
+
attr_accessor :lineno
|
9
|
+
|
10
|
+
def self.create(headers, row, lineno)
|
11
|
+
row_wrapper = RowWrapper[headers.zip(row)]
|
12
|
+
row_wrapper.lineno = lineno
|
13
|
+
row_wrapper
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_pretty_s
|
17
|
+
reject { |_, v| v.strip.empty? }
|
18
|
+
.each_with_index
|
19
|
+
.map { |(k, v), idx| sprintf(' %-3d %s: %s', idx+1, k, v) }
|
20
|
+
.join("\n") + "\n"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize(src_csv, csv_options = {})
|
25
|
+
@src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
|
26
|
+
end
|
27
|
+
|
28
|
+
def each(headers = nil)
|
29
|
+
@src_csv.rewind
|
30
|
+
|
31
|
+
lineno = 0
|
32
|
+
unless headers
|
33
|
+
headers = @src_csv.shift
|
34
|
+
strip_bom!(headers[0])
|
35
|
+
lineno += 1
|
36
|
+
end
|
37
|
+
|
38
|
+
@prev_row = nil
|
39
|
+
while (row = @src_csv.shift)
|
40
|
+
lineno += 1
|
41
|
+
yield RowWrapper.create(headers, row, lineno)
|
42
|
+
@prev_row = row
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def strip_bom!(col)
|
49
|
+
col.sub!("\xEF\xBB\xBF".force_encoding('ASCII-8BIT'), '')
|
50
|
+
end
|
51
|
+
end
|
data/lib/csv_utils/csv_report.rb
CHANGED
@@ -15,13 +15,14 @@ module CSVUtils
|
|
15
15
|
csv
|
16
16
|
end
|
17
17
|
|
18
|
-
|
18
|
+
add_headers(headers) if headers
|
19
|
+
|
20
|
+
generate(&block) if block
|
19
21
|
end
|
20
22
|
|
21
|
-
def generate
|
22
|
-
add_headers(headers) if headers
|
23
|
+
def generate
|
23
24
|
yield self
|
24
|
-
|
25
|
+
close if @must_close
|
25
26
|
end
|
26
27
|
|
27
28
|
def append(csv_row)
|
@@ -37,5 +38,9 @@ module CSVUtils
|
|
37
38
|
def add_headers(csv_row)
|
38
39
|
append(csv_row.is_a?(Array) ? csv_row : csv_row.csv_headers)
|
39
40
|
end
|
41
|
+
|
42
|
+
def close
|
43
|
+
@csv.close
|
44
|
+
end
|
40
45
|
end
|
41
46
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: inheritance-helper
|
@@ -28,6 +28,7 @@ description: Tools for debugging malformed CSV files
|
|
28
28
|
email: dougyouch@gmail.com
|
29
29
|
executables:
|
30
30
|
- csv-change-eol
|
31
|
+
- csv-explorer
|
31
32
|
- csv-find-error
|
32
33
|
- csv-readline
|
33
34
|
- csv-validator
|
@@ -42,12 +43,14 @@ files:
|
|
42
43
|
- LICENSE
|
43
44
|
- README.md
|
44
45
|
- bin/csv-change-eol
|
46
|
+
- bin/csv-explorer
|
45
47
|
- bin/csv-find-error
|
46
48
|
- bin/csv-readline
|
47
49
|
- bin/csv-validator
|
48
50
|
- csv-utils.gemspec
|
49
51
|
- lib/csv-utils.rb
|
50
52
|
- lib/csv_utils/csv_extender.rb
|
53
|
+
- lib/csv_utils/csv_iterator.rb
|
51
54
|
- lib/csv_utils/csv_options.rb
|
52
55
|
- lib/csv_utils/csv_report.rb
|
53
56
|
- lib/csv_utils/csv_row.rb
|