csv-utils 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4c8639f4900acf07c457131e734aea972f40c7c9
4
- data.tar.gz: 4fc11860e852214625000f44603c02775457f6ae
3
+ metadata.gz: c958e28320635ac949c0e7091377dc7d261a7cd9
4
+ data.tar.gz: '0396510b65f51d2caaf20bb8749335e725331f91'
5
5
  SHA512:
6
- metadata.gz: fe52c73a4b6f5c8f5463b28236756c8763978f8374a55abc44cedc022dfd6be86ca40f005ea95d8df84f64d95b7e1667d088c0ef728fc90f221e73fd6694f881
7
- data.tar.gz: 2a8424c58b636680965fb0f913d0bbdbcc2c2ed7661ec77474111ccd3494b5c217ab8c7130be5268ae1128c926e8c82f107577999d292b3955776fedb8e37813
6
+ metadata.gz: 121cca84be39ac5fbaf38441ce5cea4a689b7e29ec29b5b0e020d1db76c37d9fd13ee5c16c272884e6f2b9cd98bcc16ea35195fb0707e9492c0f5218aae9a2c9
7
+ data.tar.gz: 45bc513f2a3a86776341e5a6acfaee64ce0fa2d5687dcb22cdd19883eace0ace13a1343e13a8992027ad7fa9ee1ae7c8dc564c9ed2a9dc0bba5ab55d970cc750
data/.gitignore ADDED
@@ -0,0 +1,52 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /spec/examples.txt
9
+ /test/tmp/
10
+ /test/version_tmp/
11
+ /tmp/
12
+
13
+ # Used by dotenv library to load environment variables.
14
+ # .env
15
+
16
+ ## Specific to RubyMotion:
17
+ .dat*
18
+ .repl_history
19
+ build/
20
+ *.bridgesupport
21
+ build-iPhoneOS/
22
+ build-iPhoneSimulator/
23
+
24
+ ## Specific to RubyMotion (use of CocoaPods):
25
+ #
26
+ # We recommend against adding the Pods directory to your .gitignore. However
27
+ # you should judge for yourself, the pros and cons are mentioned at:
28
+ # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29
+ #
30
+ # vendor/Pods/
31
+
32
+ ## Documentation cache and generated files:
33
+ /.yardoc/
34
+ /_yardoc/
35
+ /doc/
36
+ /rdoc/
37
+
38
+ ## Environment normalization:
39
+ /.bundle/
40
+ /vendor/bundle
41
+ /lib/bundler/man/
42
+
43
+ # for a library or gem, you might want to ignore these files since the code is
44
+ # intended to run in multiple environments; otherwise, check them in:
45
+ # Gemfile.lock
46
+ # .ruby-version
47
+ # .ruby-gemset
48
+
49
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
+ .rvmrc
51
+
52
+ *~
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 dougyouch
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,2 @@
1
+ # csv-utils
2
+ CSV Utilities for manipulating csv files in code
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv'
4
+ require 'shellwords'
5
+
6
+ begin
7
+ CSV.open(ARGV[0], 'rb').each { }
8
+ rescue CSV::MalformedCSVError => e
9
+ puts e.class.to_s + ': ' + e.message
10
+ if e.message =~ /Missing or stray quote in line (\d+)/
11
+ lineno = $1.to_i
12
+ cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
13
+ puts "running #{cmd}"
14
+ system(cmd)
15
+ end
16
+ exit 1
17
+ end
18
+
19
+ puts 'CSV file is ok'
data/bin/csv-readline ADDED
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ BYTE_ORDER_MARKS = {
4
+ "\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
5
+ "\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
6
+ "\xFF\xFE".force_encoding('ASCII-8BIT') => 'UTF-16',
7
+ "\x00\x00\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-32',
8
+ "\xFF\xFE\x00\x00".force_encoding('ASCII-8BIT') => 'UTF-32'
9
+ }
10
+
11
+ def strip_byte_order_mark(line)
12
+ BYTE_ORDER_MARKS.keys.detect do |bom|
13
+ if line =~ /\A#{bom}/
14
+ return line.sub(/\A#{bom}/, '')
15
+ end
16
+ end
17
+ line
18
+ end
19
+
20
+ def csv_parse_line(line)
21
+ columns = []
22
+
23
+ opened_quote = false
24
+ pos = -1
25
+ last_comma_pos = 0
26
+ column = 1
27
+
28
+ while pos = line.index(/([",])/, pos + 1)
29
+ case line[pos]
30
+ when '"'
31
+ if opened_quote
32
+ if line[pos+1] == ',' ||
33
+ line[pos+1] == "\n" ||
34
+ line[pos+1].nil?
35
+ opened_quote = false
36
+ elsif line[pos+1] == '"'
37
+ pos += 1 # correctly escaped
38
+ else
39
+ col_end = line.index(/,/, pos + 1)
40
+ col_end ||= line.size - 1
41
+ # slice out the column value
42
+ val = line[last_comma_pos + 1, col_end - last_comma_pos - 1]
43
+ columns << [val, :stray_quote]
44
+ opened_quote = false
45
+ last_comma_pos = col_end
46
+ pos = col_end
47
+ # return columns
48
+ end
49
+ else
50
+ opened_quote = true
51
+ end
52
+ when ','
53
+ if ! opened_quote
54
+ column += 1
55
+ columns << [line[last_comma_pos + 1, pos - last_comma_pos - 1], :ok]
56
+ last_comma_pos = pos
57
+ end
58
+ end
59
+ end
60
+
61
+ columns
62
+ end
63
+
64
+ def empty_column?(col)
65
+ col.nil? ||
66
+ col.empty? ||
67
+ col.strip.empty? ||
68
+ col == '""' ||
69
+ col == '" "'
70
+ end
71
+
72
+ def parse_csv_row(file, lineno, number_of_lines)
73
+ cnt = 1
74
+ str = ''
75
+ file.each do |line|
76
+ cnt += 1
77
+ next if cnt < lineno
78
+ str << line
79
+ number_of_lines -= 1
80
+ break if number_of_lines == 0
81
+ end
82
+ csv_parse_line(str)
83
+ end
84
+
85
+ file = File.open(ARGV[0], 'rb')
86
+ lineno = ARGV[1].to_i
87
+ number_of_lines = (ARGV[2] || 1).to_i
88
+
89
+ raise "no lineno specified" unless lineno > 0
90
+
91
+ headers = strip_byte_order_mark(file.readline.strip).split(',')
92
+
93
+ data = headers.zip(parse_csv_row(file, lineno, number_of_lines))
94
+ file.close
95
+
96
+ cnt = 0
97
+ data.each do |k, (v, status)|
98
+ cnt += 1
99
+ next if empty_column?(v)
100
+ if status == :ok
101
+ puts sprintf(' %-3d %s: %s', cnt, k, v)
102
+ else
103
+ puts sprintf('* %-3d (%s) %s: %s', cnt, status.to_s.gsub('_', ' '), k, v)
104
+ end
105
+ end
data/csv-utils.gemspec ADDED
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'csv-utils'
5
+ s.version = '0.1.1'
6
+ s.summary = 'CSV Utils'
7
+ s.description = 'Tools for debugging malformed CSV files'
8
+ s.authors = ['Doug Youch']
9
+ s.email = 'dougyouch@gmail.com'
10
+ s.homepage = 'https://github.com/dougyouch/csv-utils'
11
+ s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
12
+ s.bindir = 'bin'
13
+ s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
14
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
@@ -12,10 +12,18 @@ date: 2019-04-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Tools for debugging malformed CSV files
14
14
  email: dougyouch@gmail.com
15
- executables: []
15
+ executables:
16
+ - csv-find-error
17
+ - csv-readline
16
18
  extensions: []
17
19
  extra_rdoc_files: []
18
- files: []
20
+ files:
21
+ - ".gitignore"
22
+ - LICENSE
23
+ - README.md
24
+ - bin/csv-find-error
25
+ - bin/csv-readline
26
+ - csv-utils.gemspec
19
27
  homepage: https://github.com/dougyouch/csv-utils
20
28
  licenses: []
21
29
  metadata: {}