csv-utils 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4c8639f4900acf07c457131e734aea972f40c7c9
4
- data.tar.gz: 4fc11860e852214625000f44603c02775457f6ae
3
+ metadata.gz: c958e28320635ac949c0e7091377dc7d261a7cd9
4
+ data.tar.gz: '0396510b65f51d2caaf20bb8749335e725331f91'
5
5
  SHA512:
6
- metadata.gz: fe52c73a4b6f5c8f5463b28236756c8763978f8374a55abc44cedc022dfd6be86ca40f005ea95d8df84f64d95b7e1667d088c0ef728fc90f221e73fd6694f881
7
- data.tar.gz: 2a8424c58b636680965fb0f913d0bbdbcc2c2ed7661ec77474111ccd3494b5c217ab8c7130be5268ae1128c926e8c82f107577999d292b3955776fedb8e37813
6
+ metadata.gz: 121cca84be39ac5fbaf38441ce5cea4a689b7e29ec29b5b0e020d1db76c37d9fd13ee5c16c272884e6f2b9cd98bcc16ea35195fb0707e9492c0f5218aae9a2c9
7
+ data.tar.gz: 45bc513f2a3a86776341e5a6acfaee64ce0fa2d5687dcb22cdd19883eace0ace13a1343e13a8992027ad7fa9ee1ae7c8dc564c9ed2a9dc0bba5ab55d970cc750
data/.gitignore ADDED
@@ -0,0 +1,52 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ # .env
15
+
16
+ ## Specific to RubyMotion:
17
+ .dat*
18
+ .repl_history
19
+ build/
20
+ *.bridgesupport
21
+ build-iPhoneOS/
22
+ build-iPhoneSimulator/
23
+
24
+ ## Specific to RubyMotion (use of CocoaPods):
25
+ #
26
+ # We recommend against adding the Pods directory to your .gitignore. However
27
+ # you should judge for yourself, the pros and cons are mentioned at:
28
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29
+ #
30
+ # vendor/Pods/
31
+
32
+ ## Documentation cache and generated files:
33
+ /.yardoc/
34
+ /_yardoc/
35
+ /doc/
36
+ /rdoc/
37
+
38
+ ## Environment normalization:
39
+ /.bundle/
40
+ /vendor/bundle
41
+ /lib/bundler/man/
42
+
43
+ # for a library or gem, you might want to ignore these files since the code is
44
+ # intended to run in multiple environments; otherwise, check them in:
45
+ # Gemfile.lock
46
+ # .ruby-version
47
+ # .ruby-gemset
48
+
49
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
+ .rvmrc
51
+
52
+ *~
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 dougyouch
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # csv-utils
2
+ CSV Utilities for manipulating csv files in code
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv'
4
+ require 'shellwords'
5
+
6
+ begin
7
+ CSV.open(ARGV[0], 'rb').each { }
8
+ rescue CSV::MalformedCSVError => e
9
+ puts e.class.to_s + ': ' + e.message
10
+ if e.message =~ /Missing or stray quote in line (\d+)/
11
+ lineno = $1.to_i
12
+ cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
13
+ puts "running #{cmd}"
14
+ system(cmd)
15
+ end
16
+ exit 1
17
+ end
18
+
19
+ puts 'CSV file is ok'
data/bin/csv-readline ADDED
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ BYTE_ORDER_MARKS = {
4
+ "\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
5
+ "\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
6
+ "\xFF\xFE".force_encoding('ASCII-8BIT') => 'UTF-16',
7
+ "\x00\x00\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-32',
8
+ "\xFF\xFE\x00\x00".force_encoding('ASCII-8BIT') => 'UTF-32'
9
+ }
10
+
11
+ def strip_byte_order_mark(line)
12
+ BYTE_ORDER_MARKS.keys.detect do |bom|
13
+ if line =~ /\A#{bom}/
14
+ return line.sub(/\A#{bom}/, '')
15
+ end
16
+ end
17
+ line
18
+ end
19
+
20
+ def csv_parse_line(line)
21
+ columns = []
22
+
23
+ opened_quote = false
24
+ pos = -1
25
+ last_comma_pos = 0
26
+ column = 1
27
+
28
+ while pos = line.index(/([",])/, pos + 1)
29
+ case line[pos]
30
+ when '"'
31
+ if opened_quote
32
+ if line[pos+1] == ',' ||
33
+ line[pos+1] == "\n" ||
34
+ line[pos+1].nil?
35
+ opened_quote = false
36
+ elsif line[pos+1] == '"'
37
+ pos += 1 # correctly escaped
38
+ else
39
+ col_end = line.index(/,/, pos + 1)
40
+ col_end ||= line.size - 1
41
+ # slice out the column value
42
+ val = line[last_comma_pos + 1, col_end - last_comma_pos - 1]
43
+ columns << [val, :stray_quote]
44
+ opened_quote = false
45
+ last_comma_pos = col_end
46
+ pos = col_end
47
+ # return columns
48
+ end
49
+ else
50
+ opened_quote = true
51
+ end
52
+ when ','
53
+ if ! opened_quote
54
+ column += 1
55
+ columns << [line[last_comma_pos + 1, pos - last_comma_pos - 1], :ok]
56
+ last_comma_pos = pos
57
+ end
58
+ end
59
+ end
60
+
61
+ columns
62
+ end
63
+
64
+ def empty_column?(col)
65
+ col.nil? ||
66
+ col.empty? ||
67
+ col.strip.empty? ||
68
+ col == '""' ||
69
+ col == '" "'
70
+ end
71
+
72
+ def parse_csv_row(file, lineno, number_of_lines)
73
+ cnt = 1
74
+ str = ''
75
+ file.each do |line|
76
+ cnt += 1
77
+ next if cnt < lineno
78
+ str << line
79
+ number_of_lines -= 1
80
+ break if number_of_lines == 0
81
+ end
82
+ csv_parse_line(str)
83
+ end
84
+
85
+ file = File.open(ARGV[0], 'rb')
86
+ lineno = ARGV[1].to_i
87
+ number_of_lines = (ARGV[2] || 1).to_i
88
+
89
+ raise "no lineno specified" unless lineno > 0
90
+
91
+ headers = strip_byte_order_mark(file.readline.strip).split(',')
92
+
93
+ data = headers.zip(parse_csv_row(file, lineno, number_of_lines))
94
+ file.close
95
+
96
+ cnt = 0
97
+ data.each do |k, (v, status)|
98
+ cnt += 1
99
+ next if empty_column?(v)
100
+ if status == :ok
101
+ puts sprintf(' %-3d %s: %s', cnt, k, v)
102
+ else
103
+ puts sprintf('* %-3d (%s) %s: %s', cnt, status.to_s.gsub('_', ' '), k, v)
104
+ end
105
+ end
data/csv-utils.gemspec ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'csv-utils'
5
+ s.version = '0.1.1'
6
+ s.summary = 'CSV Utils'
7
+ s.description = 'Tools for debugging malformed CSV files'
8
+ s.authors = ['Doug Youch']
9
+ s.email = 'dougyouch@gmail.com'
10
+ s.homepage = 'https://github.com/dougyouch/csv-utils'
11
+ s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
12
+ s.bindir = 'bin'
13
+ s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
14
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
@@ -12,10 +12,18 @@ date: 2019-04-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Tools for debugging malformed CSV files
14
14
  email: dougyouch@gmail.com
15
- executables: []
15
+ executables:
16
+ - csv-find-error
17
+ - csv-readline
16
18
  extensions: []
17
19
  extra_rdoc_files: []
18
- files: []
20
+ files:
21
+ - ".gitignore"
22
+ - LICENSE
23
+ - README.md
24
+ - bin/csv-find-error
25
+ - bin/csv-readline
26
+ - csv-utils.gemspec
19
27
  homepage: https://github.com/dougyouch/csv-utils
20
28
  licenses: []
21
29
  metadata: {}