csv-utils 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +52 -0
- data/LICENSE +21 -0
- data/README.md +2 -0
- data/bin/csv-find-error +19 -0
- data/bin/csv-readline +105 -0
- data/csv-utils.gemspec +14 -0
- metadata +11 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c958e28320635ac949c0e7091377dc7d261a7cd9
|
4
|
+
data.tar.gz: '0396510b65f51d2caaf20bb8749335e725331f91'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 121cca84be39ac5fbaf38441ce5cea4a689b7e29ec29b5b0e020d1db76c37d9fd13ee5c16c272884e6f2b9cd98bcc16ea35195fb0707e9492c0f5218aae9a2c9
|
7
|
+
data.tar.gz: 45bc513f2a3a86776341e5a6acfaee64ce0fa2d5687dcb22cdd19883eace0ace13a1343e13a8992027ad7fa9ee1ae7c8dc564c9ed2a9dc0bba5ab55d970cc750
|
data/.gitignore
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
## Specific to RubyMotion:
|
17
|
+
.dat*
|
18
|
+
.repl_history
|
19
|
+
build/
|
20
|
+
*.bridgesupport
|
21
|
+
build-iPhoneOS/
|
22
|
+
build-iPhoneSimulator/
|
23
|
+
|
24
|
+
## Specific to RubyMotion (use of CocoaPods):
|
25
|
+
#
|
26
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
27
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
28
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
29
|
+
#
|
30
|
+
# vendor/Pods/
|
31
|
+
|
32
|
+
## Documentation cache and generated files:
|
33
|
+
/.yardoc/
|
34
|
+
/_yardoc/
|
35
|
+
/doc/
|
36
|
+
/rdoc/
|
37
|
+
|
38
|
+
## Environment normalization:
|
39
|
+
/.bundle/
|
40
|
+
/vendor/bundle
|
41
|
+
/lib/bundler/man/
|
42
|
+
|
43
|
+
# for a library or gem, you might want to ignore these files since the code is
|
44
|
+
# intended to run in multiple environments; otherwise, check them in:
|
45
|
+
# Gemfile.lock
|
46
|
+
# .ruby-version
|
47
|
+
# .ruby-gemset
|
48
|
+
|
49
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
50
|
+
.rvmrc
|
51
|
+
|
52
|
+
*~
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2019 dougyouch
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
data/bin/csv-find-error
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'shellwords'
|
5
|
+
|
6
|
+
begin
|
7
|
+
CSV.open(ARGV[0], 'rb').each { }
|
8
|
+
rescue CSV::MalformedCSVError => e
|
9
|
+
puts e.class.to_s + ': ' + e.message
|
10
|
+
if e.message =~ /Missing or stray quote in line (\d+)/
|
11
|
+
lineno = $1.to_i
|
12
|
+
cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
|
13
|
+
puts "running #{cmd}"
|
14
|
+
system(cmd)
|
15
|
+
end
|
16
|
+
exit 1
|
17
|
+
end
|
18
|
+
|
19
|
+
puts 'CSV file is ok'
|
data/bin/csv-readline
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
BYTE_ORDER_MARKS = {
|
4
|
+
"\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
|
5
|
+
"\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
|
6
|
+
"\xFF\xFE".force_encoding('ASCII-8BIT') => 'UTF-16',
|
7
|
+
"\x00\x00\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-32',
|
8
|
+
"\xFF\xFE\x00\x00".force_encoding('ASCII-8BIT') => 'UTF-32'
|
9
|
+
}
|
10
|
+
|
11
|
+
def strip_byte_order_mark(line)
|
12
|
+
BYTE_ORDER_MARKS.keys.detect do |bom|
|
13
|
+
if line =~ /\A#{bom}/
|
14
|
+
return line.sub(/\A#{bom}/, '')
|
15
|
+
end
|
16
|
+
end
|
17
|
+
line
|
18
|
+
end
|
19
|
+
|
20
|
+
def csv_parse_line(line)
|
21
|
+
columns = []
|
22
|
+
|
23
|
+
opened_quote = false
|
24
|
+
pos = -1
|
25
|
+
last_comma_pos = 0
|
26
|
+
column = 1
|
27
|
+
|
28
|
+
while pos = line.index(/([",])/, pos + 1)
|
29
|
+
case line[pos]
|
30
|
+
when '"'
|
31
|
+
if opened_quote
|
32
|
+
if line[pos+1] == ',' ||
|
33
|
+
line[pos+1] == "\n" ||
|
34
|
+
line[pos+1].nil?
|
35
|
+
opened_quote = false
|
36
|
+
elsif line[pos+1] == '"'
|
37
|
+
pos += 1 # correctly escaped
|
38
|
+
else
|
39
|
+
col_end = line.index(/,/, pos + 1)
|
40
|
+
col_end ||= line.size - 1
|
41
|
+
# slice out the column value
|
42
|
+
val = line[last_comma_pos + 1, col_end - last_comma_pos - 1]
|
43
|
+
columns << [val, :stray_quote]
|
44
|
+
opened_quote = false
|
45
|
+
last_comma_pos = col_end
|
46
|
+
pos = col_end
|
47
|
+
# return columns
|
48
|
+
end
|
49
|
+
else
|
50
|
+
opened_quote = true
|
51
|
+
end
|
52
|
+
when ','
|
53
|
+
if ! opened_quote
|
54
|
+
column += 1
|
55
|
+
columns << [line[last_comma_pos + 1, pos - last_comma_pos - 1], :ok]
|
56
|
+
last_comma_pos = pos
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
columns
|
62
|
+
end
|
63
|
+
|
64
|
+
def empty_column?(col)
|
65
|
+
col.nil? ||
|
66
|
+
col.empty? ||
|
67
|
+
col.strip.empty? ||
|
68
|
+
col == '""' ||
|
69
|
+
col == '" "'
|
70
|
+
end
|
71
|
+
|
72
|
+
def parse_csv_row(file, lineno, number_of_lines)
|
73
|
+
cnt = 1
|
74
|
+
str = ''
|
75
|
+
file.each do |line|
|
76
|
+
cnt += 1
|
77
|
+
next if cnt < lineno
|
78
|
+
str << line
|
79
|
+
number_of_lines -= 1
|
80
|
+
break if number_of_lines == 0
|
81
|
+
end
|
82
|
+
csv_parse_line(str)
|
83
|
+
end
|
84
|
+
|
85
|
+
file = File.open(ARGV[0], 'rb')
|
86
|
+
lineno = ARGV[1].to_i
|
87
|
+
number_of_lines = (ARGV[2] || 1).to_i
|
88
|
+
|
89
|
+
raise "no lineno specified" unless lineno > 0
|
90
|
+
|
91
|
+
headers = strip_byte_order_mark(file.readline.strip).split(',')
|
92
|
+
|
93
|
+
data = headers.zip(parse_csv_row(file, lineno, number_of_lines))
|
94
|
+
file.close
|
95
|
+
|
96
|
+
cnt = 0
|
97
|
+
data.each do |k, (v, status)|
|
98
|
+
cnt += 1
|
99
|
+
next if empty_column?(v)
|
100
|
+
if status == :ok
|
101
|
+
puts sprintf(' %-3d %s: %s', cnt, k, v)
|
102
|
+
else
|
103
|
+
puts sprintf('* %-3d (%s) %s: %s', cnt, status.to_s.gsub('_', ' '), k, v)
|
104
|
+
end
|
105
|
+
end
|
data/csv-utils.gemspec
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'csv-utils'
|
5
|
+
s.version = '0.1.1'
|
6
|
+
s.summary = 'CSV Utils'
|
7
|
+
s.description = 'Tools for debugging malformed CSV files'
|
8
|
+
s.authors = ['Doug Youch']
|
9
|
+
s.email = 'dougyouch@gmail.com'
|
10
|
+
s.homepage = 'https://github.com/dougyouch/csv-utils'
|
11
|
+
s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
12
|
+
s.bindir = 'bin'
|
13
|
+
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
14
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
@@ -12,10 +12,18 @@ date: 2019-04-18 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
13
13
|
description: Tools for debugging malformed CSV files
|
14
14
|
email: dougyouch@gmail.com
|
15
|
-
executables:
|
15
|
+
executables:
|
16
|
+
- csv-find-error
|
17
|
+
- csv-readline
|
16
18
|
extensions: []
|
17
19
|
extra_rdoc_files: []
|
18
|
-
files:
|
20
|
+
files:
|
21
|
+
- ".gitignore"
|
22
|
+
- LICENSE
|
23
|
+
- README.md
|
24
|
+
- bin/csv-find-error
|
25
|
+
- bin/csv-readline
|
26
|
+
- csv-utils.gemspec
|
19
27
|
homepage: https://github.com/dougyouch/csv-utils
|
20
28
|
licenses: []
|
21
29
|
metadata: {}
|