csv-utils 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +57 -0
- data/bin/csv-change-eol +54 -0
- data/bin/csv-find-error +1 -2
- data/bin/csv-readline +36 -4
- data/csv-utils.gemspec +4 -1
- data/lib/csv-utils.rb +8 -0
- data/lib/csv_utils/csv_options.rb +87 -0
- data/lib/csv_utils/csv_report.rb +40 -0
- data/lib/csv_utils/csv_row.rb +51 -0
- data/script/console +7 -0
- metadata +34 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ecb75f60c8e9b9db4cc3eb0e4ca3a0ac53aad67726ed995b7e8c341cd0dc76a3
|
4
|
+
data.tar.gz: 5138b5cc82eec0b7667c9e3435c2662bbfa1de51e469582372a158b943e57d7f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a7d685b0db28805833596b32793fca968c6a5a1f57346223b487579622423d0151a122253c69806e377015fe0cf9cf02381bafbee37cb0ba54fa290a857c1cc
|
7
|
+
data.tar.gz: 7c572f9e7c74d626084612afa188bb15b036377bbac16ed5374d7fcff300e58fab8100a77729c2fa428e4294ac6b3b643feb00b2e87e7de78883c8548193de54
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
csv-utils
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.6.3
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
ast (2.4.1)
|
5
|
+
diff-lcs (1.3)
|
6
|
+
docile (1.3.2)
|
7
|
+
inheritance-helper (0.1.5)
|
8
|
+
parallel (1.19.2)
|
9
|
+
parser (2.7.1.4)
|
10
|
+
ast (~> 2.4.1)
|
11
|
+
rainbow (3.0.0)
|
12
|
+
rake (13.0.1)
|
13
|
+
regexp_parser (1.7.1)
|
14
|
+
rexml (3.2.4)
|
15
|
+
rspec (3.9.0)
|
16
|
+
rspec-core (~> 3.9.0)
|
17
|
+
rspec-expectations (~> 3.9.0)
|
18
|
+
rspec-mocks (~> 3.9.0)
|
19
|
+
rspec-core (3.9.2)
|
20
|
+
rspec-support (~> 3.9.3)
|
21
|
+
rspec-expectations (3.9.2)
|
22
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
23
|
+
rspec-support (~> 3.9.0)
|
24
|
+
rspec-mocks (3.9.1)
|
25
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
26
|
+
rspec-support (~> 3.9.0)
|
27
|
+
rspec-support (3.9.3)
|
28
|
+
rubocop (0.86.0)
|
29
|
+
parallel (~> 1.10)
|
30
|
+
parser (>= 2.7.0.1)
|
31
|
+
rainbow (>= 2.2.2, < 4.0)
|
32
|
+
regexp_parser (>= 1.7)
|
33
|
+
rexml
|
34
|
+
rubocop-ast (>= 0.0.3, < 1.0)
|
35
|
+
ruby-progressbar (~> 1.7)
|
36
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
37
|
+
rubocop-ast (0.0.3)
|
38
|
+
parser (>= 2.7.0.1)
|
39
|
+
ruby-progressbar (1.10.1)
|
40
|
+
simplecov (0.18.5)
|
41
|
+
docile (~> 1.1)
|
42
|
+
simplecov-html (~> 0.11)
|
43
|
+
simplecov-html (0.12.2)
|
44
|
+
unicode-display_width (1.7.0)
|
45
|
+
|
46
|
+
PLATFORMS
|
47
|
+
ruby
|
48
|
+
|
49
|
+
DEPENDENCIES
|
50
|
+
inheritance-helper
|
51
|
+
rake
|
52
|
+
rspec
|
53
|
+
rubocop
|
54
|
+
simplecov
|
55
|
+
|
56
|
+
BUNDLED WITH
|
57
|
+
1.17.3
|
data/bin/csv-change-eol
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
def bold_string(str)
|
6
|
+
"\033[1m#{str}\033[0m"
|
7
|
+
end
|
8
|
+
|
9
|
+
USAGE = "Usage: #{bold_string('csv-change-eol')} <csv_file> <end of line character sequence in hex>"
|
10
|
+
|
11
|
+
def exit_on_error(msg)
|
12
|
+
$stderr.print <<STR
|
13
|
+
Error: #{bold_string(msg)}
|
14
|
+
|
15
|
+
#{USAGE}
|
16
|
+
|
17
|
+
End of line example: '7C5E7C0A' is '|^|\\n'
|
18
|
+
- 0A is new line
|
19
|
+
- 0D is carriage return
|
20
|
+
|
21
|
+
Goto: #{bold_string('http://www.asciitable.com/')} for help with the character sequence
|
22
|
+
|
23
|
+
STR
|
24
|
+
exit 1
|
25
|
+
end
|
26
|
+
|
27
|
+
csv_file = ARGV.shift || exit_on_error('no csv file specified')
|
28
|
+
eol_sequence = ARGV.shift || exit_on_error('no EOL character sequence specified')
|
29
|
+
|
30
|
+
exit_on_error("file #{csv_file} not found") unless File.exist?(csv_file)
|
31
|
+
exit_on_error("not a HEX sequece (#{eol_sequence})") unless eol_sequence =~ /\A[0-9a-f]+\z/i
|
32
|
+
exit_on_error("incorrect number of characters in (#{eol_sequence}), should be even") unless eol_sequence.size.even?
|
33
|
+
|
34
|
+
eol_sequence = [eol_sequence].pack('H*')
|
35
|
+
|
36
|
+
|
37
|
+
escaped_csv_file =
|
38
|
+
if csv_file =~ /\.csv$/i
|
39
|
+
csv_file.sub(/(\.csv)$/i, '.escaped-eol\1')
|
40
|
+
else
|
41
|
+
csv_file + '.escaped-eol'
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
File.open(escaped_csv_file, 'wb') do |out|
|
46
|
+
CSV.foreach(csv_file) do |row|
|
47
|
+
line = row.to_csv
|
48
|
+
line.rstrip!
|
49
|
+
line.concat(eol_sequence)
|
50
|
+
out.write line
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
puts escaped_csv_file
|
data/bin/csv-find-error
CHANGED
@@ -7,8 +7,7 @@ begin
|
|
7
7
|
CSV.open(ARGV[0], 'rb').each { }
|
8
8
|
rescue CSV::MalformedCSVError => e
|
9
9
|
puts e.class.to_s + ': ' + e.message
|
10
|
-
if e.message =~ /
|
11
|
-
e.message =~ /Unclosed quoted field on line (\d+)/
|
10
|
+
if e.message =~ /line (\d+)/
|
12
11
|
lineno = $1.to_i
|
13
12
|
cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
|
14
13
|
puts "running #{cmd}"
|
data/bin/csv-readline
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require 'optparse'
|
4
|
+
|
3
5
|
BYTE_ORDER_MARKS = {
|
4
6
|
"\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
|
5
7
|
"\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
|
@@ -25,11 +27,12 @@ def csv_parse_line(line)
|
|
25
27
|
last_comma_pos = -1
|
26
28
|
column = 1
|
27
29
|
|
28
|
-
while pos = line.index(/(["
|
30
|
+
while pos = line.index(/([",\n])/, pos + 1)
|
29
31
|
case line[pos]
|
30
32
|
when '"'
|
31
33
|
if opened_quote
|
32
34
|
if line[pos+1] == ',' ||
|
35
|
+
line[pos+1] == "\r" ||
|
33
36
|
line[pos+1] == "\n" ||
|
34
37
|
line[pos+1].nil?
|
35
38
|
opened_quote = false
|
@@ -47,9 +50,22 @@ def csv_parse_line(line)
|
|
47
50
|
# return columns
|
48
51
|
end
|
49
52
|
else
|
50
|
-
|
53
|
+
if (last_comma_pos == -1 && pos != 0) ||
|
54
|
+
(last_comma_pos != -1 && line[pos-1] != ',')
|
55
|
+
col_end = line.index(/,/, pos + 1)
|
56
|
+
col_end ||= line.size - 1
|
57
|
+
# slice out the column value
|
58
|
+
val = line[last_comma_pos + 1, col_end - last_comma_pos - 1]
|
59
|
+
columns << [val, :stray_quote]
|
60
|
+
opened_quote = false
|
61
|
+
last_comma_pos = col_end
|
62
|
+
pos = col_end
|
63
|
+
else
|
64
|
+
opened_quote = true
|
65
|
+
end
|
51
66
|
end
|
52
|
-
when ','
|
67
|
+
when ',',
|
68
|
+
"\n"
|
53
69
|
if ! opened_quote
|
54
70
|
column += 1
|
55
71
|
columns << [line[last_comma_pos + 1, pos - last_comma_pos - 1], :ok]
|
@@ -82,6 +98,22 @@ def parse_csv_row(file, lineno, number_of_lines)
|
|
82
98
|
csv_parse_line(str)
|
83
99
|
end
|
84
100
|
|
101
|
+
options = {
|
102
|
+
all_columns: false
|
103
|
+
}
|
104
|
+
OptionParser.new do |opts|
|
105
|
+
opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file> <line number> [<number of lines>]'
|
106
|
+
|
107
|
+
opts.on('-h', '--help', 'Prints this help') do
|
108
|
+
puts opts
|
109
|
+
exit
|
110
|
+
end
|
111
|
+
|
112
|
+
opts.on('-a', '--all', 'Display all columns') do
|
113
|
+
options[:all_columns] = true
|
114
|
+
end
|
115
|
+
end.parse!
|
116
|
+
|
85
117
|
file = File.open(ARGV[0], 'rb')
|
86
118
|
lineno = ARGV[1].to_i
|
87
119
|
number_of_lines = (ARGV[2] || 1).to_i
|
@@ -96,7 +128,7 @@ file.close
|
|
96
128
|
cnt = 0
|
97
129
|
data.each do |k, (v, status)|
|
98
130
|
cnt += 1
|
99
|
-
next if empty_column?(v)
|
131
|
+
next if !options[:all_columns] && empty_column?(v)
|
100
132
|
if status == :ok
|
101
133
|
puts sprintf(' %-3d %s: %s', cnt, k, v)
|
102
134
|
else
|
data/csv-utils.gemspec
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'csv-utils'
|
5
|
-
s.version = '0.
|
5
|
+
s.version = '0.2.0'
|
6
|
+
s.licenses = ['MIT']
|
6
7
|
s.summary = 'CSV Utils'
|
7
8
|
s.description = 'Tools for debugging malformed CSV files'
|
8
9
|
s.authors = ['Doug Youch']
|
@@ -11,4 +12,6 @@ Gem::Specification.new do |s|
|
|
11
12
|
s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
12
13
|
s.bindir = 'bin'
|
13
14
|
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
15
|
+
|
16
|
+
s.add_runtime_dependency 'inheritance-helper'
|
14
17
|
end
|
data/lib/csv-utils.rb
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
# Auto detect a csv files options
|
2
|
+
module CSVUtils
|
3
|
+
class CSVOptions
|
4
|
+
|
5
|
+
# this list is from https://en.wikipedia.org/wiki/Byte_order_mark
|
6
|
+
BYTE_ORDER_MARKS = {
|
7
|
+
"\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
|
8
|
+
"\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
|
9
|
+
"\xFF\xFE".force_encoding('ASCII-8BIT') => 'UTF-16',
|
10
|
+
"\x00\x00\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-32',
|
11
|
+
"\xFF\xFE\x00\x00".force_encoding('ASCII-8BIT') => 'UTF-32'
|
12
|
+
}
|
13
|
+
|
14
|
+
COL_SEPARATORS = [
|
15
|
+
"\x02",
|
16
|
+
"\t",
|
17
|
+
'|',
|
18
|
+
','
|
19
|
+
]
|
20
|
+
|
21
|
+
ROW_SEPARATORS = [
|
22
|
+
"\r\n",
|
23
|
+
"\n",
|
24
|
+
"\r"
|
25
|
+
]
|
26
|
+
|
27
|
+
attr_reader :columns,
|
28
|
+
:byte_order_mark,
|
29
|
+
:encoding,
|
30
|
+
:col_separator,
|
31
|
+
:row_separator
|
32
|
+
|
33
|
+
|
34
|
+
def initialize(io)
|
35
|
+
line =
|
36
|
+
if io.is_a?(String)
|
37
|
+
File.open(io, 'rb', &:readline)
|
38
|
+
else
|
39
|
+
io.readline
|
40
|
+
end
|
41
|
+
|
42
|
+
@col_separator = auto_detect_col_sep(line)
|
43
|
+
@row_separator = auto_detect_row_sep(line)
|
44
|
+
@byte_order_mark = get_byte_order_mark(line)
|
45
|
+
@encoding = get_character_encoding(@byte_order_mark)
|
46
|
+
@columns = get_number_of_columns(line) if @col_separator
|
47
|
+
end
|
48
|
+
|
49
|
+
def valid?
|
50
|
+
return false if @col_separator.nil? || @row_separator.nil?
|
51
|
+
|
52
|
+
true
|
53
|
+
end
|
54
|
+
|
55
|
+
def auto_detect_col_sep(line)
|
56
|
+
COL_SEPARATORS.detect { |sep| line.include?(sep) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def auto_detect_row_sep(line)
|
60
|
+
ROW_SEPARATORS.detect { |sep| line.include?(sep) }
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_headers(line)
|
64
|
+
headers = line.split(col_separator)
|
65
|
+
headers[0] = strip_byte_order_marks(headers[0])
|
66
|
+
headers
|
67
|
+
end
|
68
|
+
|
69
|
+
def get_number_of_columns(line)
|
70
|
+
get_headers(line).size
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_byte_order_mark(line)
|
74
|
+
BYTE_ORDER_MARKS.keys.detect do |bom|
|
75
|
+
line =~ /\A#{bom}/
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def get_character_encoding(bom)
|
80
|
+
BYTE_ORDER_MARKS[bom] || 'UTF-8'
|
81
|
+
end
|
82
|
+
|
83
|
+
def strip_byte_order_marks(header)
|
84
|
+
@byte_order_marks ? header.sub(@byte_order_marks, '') : header
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Builds a csv file from csv rows
|
2
|
+
module CSVUtils
|
3
|
+
class CSVReport
|
4
|
+
attr_reader :csv,
|
5
|
+
:must_close
|
6
|
+
|
7
|
+
def initialize(csv, csv_options = {}, &block)
|
8
|
+
@csv =
|
9
|
+
if csv.is_a?(String)
|
10
|
+
@must_close = true
|
11
|
+
mode = csv_options.delete(:mode) || 'wb'
|
12
|
+
CSV.open(csv, mode, csv_options)
|
13
|
+
else
|
14
|
+
@must_close = false
|
15
|
+
csv
|
16
|
+
end
|
17
|
+
|
18
|
+
generate(&block) if block
|
19
|
+
end
|
20
|
+
|
21
|
+
def generate
|
22
|
+
yield self
|
23
|
+
@csv.close if @must_close
|
24
|
+
end
|
25
|
+
|
26
|
+
def append(csv_row)
|
27
|
+
@csv <<
|
28
|
+
if csv_row.is_a?(Array)
|
29
|
+
csv_row
|
30
|
+
else
|
31
|
+
csv_row.to_a
|
32
|
+
end
|
33
|
+
end
|
34
|
+
alias << append
|
35
|
+
|
36
|
+
def add_headers(csv_row)
|
37
|
+
append(csv_row.is_a?(Array) ? csv_row : csv_row.csv_headers)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'inheritance-helper'
|
2
|
+
|
3
|
+
module CSVUtils
|
4
|
+
module CSVRow
|
5
|
+
def self.included(base)
|
6
|
+
base.extend InheritanceHelper::Methods
|
7
|
+
base.extend ClassMethods
|
8
|
+
end
|
9
|
+
|
10
|
+
module ClassMethods
|
11
|
+
def csv_columns
|
12
|
+
{}
|
13
|
+
end
|
14
|
+
|
15
|
+
def csv_column(header, options = {}, &block)
|
16
|
+
options[:header] ||= header.to_s
|
17
|
+
|
18
|
+
if block
|
19
|
+
options[:proc] = block
|
20
|
+
elsif options[:proc].nil?
|
21
|
+
options[:method] ||= header
|
22
|
+
end
|
23
|
+
|
24
|
+
add_value_to_class_method(:csv_columns, header => options)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def csv_headers
|
29
|
+
self.class.csv_columns.values.map { |column_options| csv_column_header(column_options) }
|
30
|
+
end
|
31
|
+
|
32
|
+
def csv_row
|
33
|
+
self.class.csv_columns.values.map { |column_options| csv_column_value(column_options) }
|
34
|
+
end
|
35
|
+
alias_method :to_a, :csv_row
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def csv_column_header(column_options)
|
40
|
+
column_options[:header]
|
41
|
+
end
|
42
|
+
|
43
|
+
def csv_column_value(column_options)
|
44
|
+
if column_options[:proc]
|
45
|
+
instance_eval(&column_options[:proc])
|
46
|
+
else
|
47
|
+
send(column_options[:method])
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/script/console
ADDED
metadata
CHANGED
@@ -1,33 +1,59 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
12
|
-
dependencies:
|
11
|
+
date: 2020-06-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: inheritance-helper
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
description: Tools for debugging malformed CSV files
|
14
28
|
email: dougyouch@gmail.com
|
15
29
|
executables:
|
30
|
+
- csv-change-eol
|
16
31
|
- csv-find-error
|
17
32
|
- csv-readline
|
18
33
|
extensions: []
|
19
34
|
extra_rdoc_files: []
|
20
35
|
files:
|
21
36
|
- ".gitignore"
|
37
|
+
- ".ruby-gemset"
|
38
|
+
- ".ruby-version"
|
39
|
+
- Gemfile
|
40
|
+
- Gemfile.lock
|
22
41
|
- LICENSE
|
23
42
|
- README.md
|
43
|
+
- bin/csv-change-eol
|
24
44
|
- bin/csv-find-error
|
25
45
|
- bin/csv-readline
|
26
46
|
- csv-utils.gemspec
|
47
|
+
- lib/csv-utils.rb
|
48
|
+
- lib/csv_utils/csv_options.rb
|
49
|
+
- lib/csv_utils/csv_report.rb
|
50
|
+
- lib/csv_utils/csv_row.rb
|
51
|
+
- script/console
|
27
52
|
homepage: https://github.com/dougyouch/csv-utils
|
28
|
-
licenses:
|
53
|
+
licenses:
|
54
|
+
- MIT
|
29
55
|
metadata: {}
|
30
|
-
post_install_message:
|
56
|
+
post_install_message:
|
31
57
|
rdoc_options: []
|
32
58
|
require_paths:
|
33
59
|
- lib
|
@@ -42,9 +68,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
42
68
|
- !ruby/object:Gem::Version
|
43
69
|
version: '0'
|
44
70
|
requirements: []
|
45
|
-
|
46
|
-
|
47
|
-
signing_key:
|
71
|
+
rubygems_version: 3.0.8
|
72
|
+
signing_key:
|
48
73
|
specification_version: 4
|
49
74
|
summary: CSV Utils
|
50
75
|
test_files: []
|