csv-utils 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +57 -0
- data/bin/csv-change-eol +54 -0
- data/bin/csv-find-error +1 -2
- data/bin/csv-readline +36 -4
- data/csv-utils.gemspec +4 -1
- data/lib/csv-utils.rb +8 -0
- data/lib/csv_utils/csv_options.rb +87 -0
- data/lib/csv_utils/csv_report.rb +40 -0
- data/lib/csv_utils/csv_row.rb +51 -0
- data/script/console +7 -0
- metadata +34 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ecb75f60c8e9b9db4cc3eb0e4ca3a0ac53aad67726ed995b7e8c341cd0dc76a3
|
4
|
+
data.tar.gz: 5138b5cc82eec0b7667c9e3435c2662bbfa1de51e469582372a158b943e57d7f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a7d685b0db28805833596b32793fca968c6a5a1f57346223b487579622423d0151a122253c69806e377015fe0cf9cf02381bafbee37cb0ba54fa290a857c1cc
|
7
|
+
data.tar.gz: 7c572f9e7c74d626084612afa188bb15b036377bbac16ed5374d7fcff300e58fab8100a77729c2fa428e4294ac6b3b643feb00b2e87e7de78883c8548193de54
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
csv-utils
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.6.3
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
ast (2.4.1)
|
5
|
+
diff-lcs (1.3)
|
6
|
+
docile (1.3.2)
|
7
|
+
inheritance-helper (0.1.5)
|
8
|
+
parallel (1.19.2)
|
9
|
+
parser (2.7.1.4)
|
10
|
+
ast (~> 2.4.1)
|
11
|
+
rainbow (3.0.0)
|
12
|
+
rake (13.0.1)
|
13
|
+
regexp_parser (1.7.1)
|
14
|
+
rexml (3.2.4)
|
15
|
+
rspec (3.9.0)
|
16
|
+
rspec-core (~> 3.9.0)
|
17
|
+
rspec-expectations (~> 3.9.0)
|
18
|
+
rspec-mocks (~> 3.9.0)
|
19
|
+
rspec-core (3.9.2)
|
20
|
+
rspec-support (~> 3.9.3)
|
21
|
+
rspec-expectations (3.9.2)
|
22
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
23
|
+
rspec-support (~> 3.9.0)
|
24
|
+
rspec-mocks (3.9.1)
|
25
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
26
|
+
rspec-support (~> 3.9.0)
|
27
|
+
rspec-support (3.9.3)
|
28
|
+
rubocop (0.86.0)
|
29
|
+
parallel (~> 1.10)
|
30
|
+
parser (>= 2.7.0.1)
|
31
|
+
rainbow (>= 2.2.2, < 4.0)
|
32
|
+
regexp_parser (>= 1.7)
|
33
|
+
rexml
|
34
|
+
rubocop-ast (>= 0.0.3, < 1.0)
|
35
|
+
ruby-progressbar (~> 1.7)
|
36
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
37
|
+
rubocop-ast (0.0.3)
|
38
|
+
parser (>= 2.7.0.1)
|
39
|
+
ruby-progressbar (1.10.1)
|
40
|
+
simplecov (0.18.5)
|
41
|
+
docile (~> 1.1)
|
42
|
+
simplecov-html (~> 0.11)
|
43
|
+
simplecov-html (0.12.2)
|
44
|
+
unicode-display_width (1.7.0)
|
45
|
+
|
46
|
+
PLATFORMS
|
47
|
+
ruby
|
48
|
+
|
49
|
+
DEPENDENCIES
|
50
|
+
inheritance-helper
|
51
|
+
rake
|
52
|
+
rspec
|
53
|
+
rubocop
|
54
|
+
simplecov
|
55
|
+
|
56
|
+
BUNDLED WITH
|
57
|
+
1.17.3
|
data/bin/csv-change-eol
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
def bold_string(str)
|
6
|
+
"\033[1m#{str}\033[0m"
|
7
|
+
end
|
8
|
+
|
9
|
+
USAGE = "Usage: #{bold_string('csv-change-eol')} <csv_file> <end of line character sequence in hex>"
|
10
|
+
|
11
|
+
def exit_on_error(msg)
|
12
|
+
$stderr.print <<STR
|
13
|
+
Error: #{bold_string(msg)}
|
14
|
+
|
15
|
+
#{USAGE}
|
16
|
+
|
17
|
+
End of line example: '7C5E7C0A' is '|^|\\n'
|
18
|
+
- 0A is new line
|
19
|
+
- 0D is carriage return
|
20
|
+
|
21
|
+
Goto: #{bold_string('http://www.asciitable.com/')} for help with the character sequence
|
22
|
+
|
23
|
+
STR
|
24
|
+
exit 1
|
25
|
+
end
|
26
|
+
|
27
|
+
csv_file = ARGV.shift || exit_on_error('no csv file specified')
|
28
|
+
eol_sequence = ARGV.shift || exit_on_error('no EOL character sequence specified')
|
29
|
+
|
30
|
+
exit_on_error("file #{csv_file} not found") unless File.exist?(csv_file)
|
31
|
+
exit_on_error("not a HEX sequece (#{eol_sequence})") unless eol_sequence =~ /\A[0-9a-f]+\z/i
|
32
|
+
exit_on_error("incorrect number of characters in (#{eol_sequence}), should be even") unless eol_sequence.size.even?
|
33
|
+
|
34
|
+
eol_sequence = [eol_sequence].pack('H*')
|
35
|
+
|
36
|
+
|
37
|
+
escaped_csv_file =
|
38
|
+
if csv_file =~ /\.csv$/i
|
39
|
+
csv_file.sub(/(\.csv)$/i, '.escaped-eol\1')
|
40
|
+
else
|
41
|
+
csv_file + '.escaped-eol'
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
File.open(escaped_csv_file, 'wb') do |out|
|
46
|
+
CSV.foreach(csv_file) do |row|
|
47
|
+
line = row.to_csv
|
48
|
+
line.rstrip!
|
49
|
+
line.concat(eol_sequence)
|
50
|
+
out.write line
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
puts escaped_csv_file
|
data/bin/csv-find-error
CHANGED
@@ -7,8 +7,7 @@ begin
|
|
7
7
|
CSV.open(ARGV[0], 'rb').each { }
|
8
8
|
rescue CSV::MalformedCSVError => e
|
9
9
|
puts e.class.to_s + ': ' + e.message
|
10
|
-
if e.message =~ /
|
11
|
-
e.message =~ /Unclosed quoted field on line (\d+)/
|
10
|
+
if e.message =~ /line (\d+)/
|
12
11
|
lineno = $1.to_i
|
13
12
|
cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
|
14
13
|
puts "running #{cmd}"
|
data/bin/csv-readline
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require 'optparse'
|
4
|
+
|
3
5
|
BYTE_ORDER_MARKS = {
|
4
6
|
"\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
|
5
7
|
"\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
|
@@ -25,11 +27,12 @@ def csv_parse_line(line)
|
|
25
27
|
last_comma_pos = -1
|
26
28
|
column = 1
|
27
29
|
|
28
|
-
while pos = line.index(/(["
|
30
|
+
while pos = line.index(/([",\n])/, pos + 1)
|
29
31
|
case line[pos]
|
30
32
|
when '"'
|
31
33
|
if opened_quote
|
32
34
|
if line[pos+1] == ',' ||
|
35
|
+
line[pos+1] == "\r" ||
|
33
36
|
line[pos+1] == "\n" ||
|
34
37
|
line[pos+1].nil?
|
35
38
|
opened_quote = false
|
@@ -47,9 +50,22 @@ def csv_parse_line(line)
|
|
47
50
|
# return columns
|
48
51
|
end
|
49
52
|
else
|
50
|
-
|
53
|
+
if (last_comma_pos == -1 && pos != 0) ||
|
54
|
+
(last_comma_pos != -1 && line[pos-1] != ',')
|
55
|
+
col_end = line.index(/,/, pos + 1)
|
56
|
+
col_end ||= line.size - 1
|
57
|
+
# slice out the column value
|
58
|
+
val = line[last_comma_pos + 1, col_end - last_comma_pos - 1]
|
59
|
+
columns << [val, :stray_quote]
|
60
|
+
opened_quote = false
|
61
|
+
last_comma_pos = col_end
|
62
|
+
pos = col_end
|
63
|
+
else
|
64
|
+
opened_quote = true
|
65
|
+
end
|
51
66
|
end
|
52
|
-
when ','
|
67
|
+
when ',',
|
68
|
+
"\n"
|
53
69
|
if ! opened_quote
|
54
70
|
column += 1
|
55
71
|
columns << [line[last_comma_pos + 1, pos - last_comma_pos - 1], :ok]
|
@@ -82,6 +98,22 @@ def parse_csv_row(file, lineno, number_of_lines)
|
|
82
98
|
csv_parse_line(str)
|
83
99
|
end
|
84
100
|
|
101
|
+
options = {
|
102
|
+
all_columns: false
|
103
|
+
}
|
104
|
+
OptionParser.new do |opts|
|
105
|
+
opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file> <line number> [<number of lines>]'
|
106
|
+
|
107
|
+
opts.on('-h', '--help', 'Prints this help') do
|
108
|
+
puts opts
|
109
|
+
exit
|
110
|
+
end
|
111
|
+
|
112
|
+
opts.on('-a', '--all', 'Display all columns') do
|
113
|
+
options[:all_columns] = true
|
114
|
+
end
|
115
|
+
end.parse!
|
116
|
+
|
85
117
|
file = File.open(ARGV[0], 'rb')
|
86
118
|
lineno = ARGV[1].to_i
|
87
119
|
number_of_lines = (ARGV[2] || 1).to_i
|
@@ -96,7 +128,7 @@ file.close
|
|
96
128
|
cnt = 0
|
97
129
|
data.each do |k, (v, status)|
|
98
130
|
cnt += 1
|
99
|
-
next if empty_column?(v)
|
131
|
+
next if !options[:all_columns] && empty_column?(v)
|
100
132
|
if status == :ok
|
101
133
|
puts sprintf(' %-3d %s: %s', cnt, k, v)
|
102
134
|
else
|
data/csv-utils.gemspec
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'csv-utils'
|
5
|
-
s.version = '0.
|
5
|
+
s.version = '0.2.0'
|
6
|
+
s.licenses = ['MIT']
|
6
7
|
s.summary = 'CSV Utils'
|
7
8
|
s.description = 'Tools for debugging malformed CSV files'
|
8
9
|
s.authors = ['Doug Youch']
|
@@ -11,4 +12,6 @@ Gem::Specification.new do |s|
|
|
11
12
|
s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
12
13
|
s.bindir = 'bin'
|
13
14
|
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
15
|
+
|
16
|
+
s.add_runtime_dependency 'inheritance-helper'
|
14
17
|
end
|
data/lib/csv-utils.rb
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
# Auto detect a csv files options
|
2
|
+
module CSVUtils
|
3
|
+
class CSVOptions
|
4
|
+
|
5
|
+
# this list is from https://en.wikipedia.org/wiki/Byte_order_mark
|
6
|
+
BYTE_ORDER_MARKS = {
|
7
|
+
"\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
|
8
|
+
"\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
|
9
|
+
"\xFF\xFE".force_encoding('ASCII-8BIT') => 'UTF-16',
|
10
|
+
"\x00\x00\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-32',
|
11
|
+
"\xFF\xFE\x00\x00".force_encoding('ASCII-8BIT') => 'UTF-32'
|
12
|
+
}
|
13
|
+
|
14
|
+
COL_SEPARATORS = [
|
15
|
+
"\x02",
|
16
|
+
"\t",
|
17
|
+
'|',
|
18
|
+
','
|
19
|
+
]
|
20
|
+
|
21
|
+
ROW_SEPARATORS = [
|
22
|
+
"\r\n",
|
23
|
+
"\n",
|
24
|
+
"\r"
|
25
|
+
]
|
26
|
+
|
27
|
+
attr_reader :columns,
|
28
|
+
:byte_order_mark,
|
29
|
+
:encoding,
|
30
|
+
:col_separator,
|
31
|
+
:row_separator
|
32
|
+
|
33
|
+
|
34
|
+
def initialize(io)
|
35
|
+
line =
|
36
|
+
if io.is_a?(String)
|
37
|
+
File.open(io, 'rb', &:readline)
|
38
|
+
else
|
39
|
+
io.readline
|
40
|
+
end
|
41
|
+
|
42
|
+
@col_separator = auto_detect_col_sep(line)
|
43
|
+
@row_separator = auto_detect_row_sep(line)
|
44
|
+
@byte_order_mark = get_byte_order_mark(line)
|
45
|
+
@encoding = get_character_encoding(@byte_order_mark)
|
46
|
+
@columns = get_number_of_columns(line) if @col_separator
|
47
|
+
end
|
48
|
+
|
49
|
+
def valid?
|
50
|
+
return false if @col_separator.nil? || @row_separator.nil?
|
51
|
+
|
52
|
+
true
|
53
|
+
end
|
54
|
+
|
55
|
+
def auto_detect_col_sep(line)
|
56
|
+
COL_SEPARATORS.detect { |sep| line.include?(sep) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def auto_detect_row_sep(line)
|
60
|
+
ROW_SEPARATORS.detect { |sep| line.include?(sep) }
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_headers(line)
|
64
|
+
headers = line.split(col_separator)
|
65
|
+
headers[0] = strip_byte_order_marks(headers[0])
|
66
|
+
headers
|
67
|
+
end
|
68
|
+
|
69
|
+
def get_number_of_columns(line)
|
70
|
+
get_headers(line).size
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_byte_order_mark(line)
|
74
|
+
BYTE_ORDER_MARKS.keys.detect do |bom|
|
75
|
+
line =~ /\A#{bom}/
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def get_character_encoding(bom)
|
80
|
+
BYTE_ORDER_MARKS[bom] || 'UTF-8'
|
81
|
+
end
|
82
|
+
|
83
|
+
def strip_byte_order_marks(header)
|
84
|
+
@byte_order_marks ? header.sub(@byte_order_marks, '') : header
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Builds a csv file from csv rows
|
2
|
+
module CSVUtils
|
3
|
+
class CSVReport
|
4
|
+
attr_reader :csv,
|
5
|
+
:must_close
|
6
|
+
|
7
|
+
def initialize(csv, csv_options = {}, &block)
|
8
|
+
@csv =
|
9
|
+
if csv.is_a?(String)
|
10
|
+
@must_close = true
|
11
|
+
mode = csv_options.delete(:mode) || 'wb'
|
12
|
+
CSV.open(csv, mode, csv_options)
|
13
|
+
else
|
14
|
+
@must_close = false
|
15
|
+
csv
|
16
|
+
end
|
17
|
+
|
18
|
+
generate(&block) if block
|
19
|
+
end
|
20
|
+
|
21
|
+
def generate
|
22
|
+
yield self
|
23
|
+
@csv.close if @must_close
|
24
|
+
end
|
25
|
+
|
26
|
+
def append(csv_row)
|
27
|
+
@csv <<
|
28
|
+
if csv_row.is_a?(Array)
|
29
|
+
csv_row
|
30
|
+
else
|
31
|
+
csv_row.to_a
|
32
|
+
end
|
33
|
+
end
|
34
|
+
alias << append
|
35
|
+
|
36
|
+
def add_headers(csv_row)
|
37
|
+
append(csv_row.is_a?(Array) ? csv_row : csv_row.csv_headers)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'inheritance-helper'
|
2
|
+
|
3
|
+
module CSVUtils
|
4
|
+
module CSVRow
|
5
|
+
def self.included(base)
|
6
|
+
base.extend InheritanceHelper::Methods
|
7
|
+
base.extend ClassMethods
|
8
|
+
end
|
9
|
+
|
10
|
+
module ClassMethods
|
11
|
+
def csv_columns
|
12
|
+
{}
|
13
|
+
end
|
14
|
+
|
15
|
+
def csv_column(header, options = {}, &block)
|
16
|
+
options[:header] ||= header.to_s
|
17
|
+
|
18
|
+
if block
|
19
|
+
options[:proc] = block
|
20
|
+
elsif options[:proc].nil?
|
21
|
+
options[:method] ||= header
|
22
|
+
end
|
23
|
+
|
24
|
+
add_value_to_class_method(:csv_columns, header => options)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def csv_headers
|
29
|
+
self.class.csv_columns.values.map { |column_options| csv_column_header(column_options) }
|
30
|
+
end
|
31
|
+
|
32
|
+
def csv_row
|
33
|
+
self.class.csv_columns.values.map { |column_options| csv_column_value(column_options) }
|
34
|
+
end
|
35
|
+
alias_method :to_a, :csv_row
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def csv_column_header(column_options)
|
40
|
+
column_options[:header]
|
41
|
+
end
|
42
|
+
|
43
|
+
def csv_column_value(column_options)
|
44
|
+
if column_options[:proc]
|
45
|
+
instance_eval(&column_options[:proc])
|
46
|
+
else
|
47
|
+
send(column_options[:method])
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/script/console
ADDED
metadata
CHANGED
@@ -1,33 +1,59 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
11
|
+
date: 2020-06-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: inheritance-helper
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
description: Tools for debugging malformed CSV files
|
14
28
|
email: dougyouch@gmail.com
|
15
29
|
executables:
|
30
|
+
- csv-change-eol
|
16
31
|
- csv-find-error
|
17
32
|
- csv-readline
|
18
33
|
extensions: []
|
19
34
|
extra_rdoc_files: []
|
20
35
|
files:
|
21
36
|
- ".gitignore"
|
37
|
+
- ".ruby-gemset"
|
38
|
+
- ".ruby-version"
|
39
|
+
- Gemfile
|
40
|
+
- Gemfile.lock
|
22
41
|
- LICENSE
|
23
42
|
- README.md
|
43
|
+
- bin/csv-change-eol
|
24
44
|
- bin/csv-find-error
|
25
45
|
- bin/csv-readline
|
26
46
|
- csv-utils.gemspec
|
47
|
+
- lib/csv-utils.rb
|
48
|
+
- lib/csv_utils/csv_options.rb
|
49
|
+
- lib/csv_utils/csv_report.rb
|
50
|
+
- lib/csv_utils/csv_row.rb
|
51
|
+
- script/console
|
27
52
|
homepage: https://github.com/dougyouch/csv-utils
|
28
|
-
licenses:
|
53
|
+
licenses:
|
54
|
+
- MIT
|
29
55
|
metadata: {}
|
30
|
-
post_install_message:
|
56
|
+
post_install_message:
|
31
57
|
rdoc_options: []
|
32
58
|
require_paths:
|
33
59
|
- lib
|
@@ -42,9 +68,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
42
68
|
- !ruby/object:Gem::Version
|
43
69
|
version: '0'
|
44
70
|
requirements: []
|
45
|
-
|
46
|
-
|
47
|
-
signing_key:
|
71
|
+
rubygems_version: 3.0.8
|
72
|
+
signing_key:
|
48
73
|
specification_version: 4
|
49
74
|
summary: CSV Utils
|
50
75
|
test_files: []
|