csv-utils 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: d2ec5906d30d2be8c744e4311fc0c7af0f58a5f4
4
- data.tar.gz: bcd816369f3e2698e30b372e3dc9c5a74a30b02f
2
+ SHA256:
3
+ metadata.gz: ecb75f60c8e9b9db4cc3eb0e4ca3a0ac53aad67726ed995b7e8c341cd0dc76a3
4
+ data.tar.gz: 5138b5cc82eec0b7667c9e3435c2662bbfa1de51e469582372a158b943e57d7f
5
5
  SHA512:
6
- metadata.gz: 44b66f060cbba4c1c2d48d62e40f6b17d437334633199e4de22988100dbd74b4bba92b9c03247bd284c562bf57b1ffdc0823e05d4591781f2cb01882fdaecc55
7
- data.tar.gz: 14aba3fd327f3f03d9e82a3a189791aa1cc84985cd78fbb752dac8d03373868af92919a8e87e900e6e4765c3e41291a2203515cc4fc365c605fd4639df60fd59
6
+ metadata.gz: 1a7d685b0db28805833596b32793fca968c6a5a1f57346223b487579622423d0151a122253c69806e377015fe0cf9cf02381bafbee37cb0ba54fa290a857c1cc
7
+ data.tar.gz: 7c572f9e7c74d626084612afa188bb15b036377bbac16ed5374d7fcff300e58fab8100a77729c2fa428e4294ac6b3b643feb00b2e87e7de78883c8548193de54
@@ -0,0 +1 @@
1
+ csv-utils
@@ -0,0 +1 @@
1
+ 2.6.3
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'http://rubygems.org'
4
+
5
+ gem 'inheritance-helper'
6
+
7
+ group :development do
8
+ gem 'rake'
9
+ gem 'rubocop'
10
+ end
11
+
12
+ group :spec do
13
+ gem 'rspec'
14
+ gem 'simplecov'
15
+ end
@@ -0,0 +1,57 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ ast (2.4.1)
5
+ diff-lcs (1.3)
6
+ docile (1.3.2)
7
+ inheritance-helper (0.1.5)
8
+ parallel (1.19.2)
9
+ parser (2.7.1.4)
10
+ ast (~> 2.4.1)
11
+ rainbow (3.0.0)
12
+ rake (13.0.1)
13
+ regexp_parser (1.7.1)
14
+ rexml (3.2.4)
15
+ rspec (3.9.0)
16
+ rspec-core (~> 3.9.0)
17
+ rspec-expectations (~> 3.9.0)
18
+ rspec-mocks (~> 3.9.0)
19
+ rspec-core (3.9.2)
20
+ rspec-support (~> 3.9.3)
21
+ rspec-expectations (3.9.2)
22
+ diff-lcs (>= 1.2.0, < 2.0)
23
+ rspec-support (~> 3.9.0)
24
+ rspec-mocks (3.9.1)
25
+ diff-lcs (>= 1.2.0, < 2.0)
26
+ rspec-support (~> 3.9.0)
27
+ rspec-support (3.9.3)
28
+ rubocop (0.86.0)
29
+ parallel (~> 1.10)
30
+ parser (>= 2.7.0.1)
31
+ rainbow (>= 2.2.2, < 4.0)
32
+ regexp_parser (>= 1.7)
33
+ rexml
34
+ rubocop-ast (>= 0.0.3, < 1.0)
35
+ ruby-progressbar (~> 1.7)
36
+ unicode-display_width (>= 1.4.0, < 2.0)
37
+ rubocop-ast (0.0.3)
38
+ parser (>= 2.7.0.1)
39
+ ruby-progressbar (1.10.1)
40
+ simplecov (0.18.5)
41
+ docile (~> 1.1)
42
+ simplecov-html (~> 0.11)
43
+ simplecov-html (0.12.2)
44
+ unicode-display_width (1.7.0)
45
+
46
+ PLATFORMS
47
+ ruby
48
+
49
+ DEPENDENCIES
50
+ inheritance-helper
51
+ rake
52
+ rspec
53
+ rubocop
54
+ simplecov
55
+
56
+ BUNDLED WITH
57
+ 1.17.3
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv'
4
+
5
+ def bold_string(str)
6
+ "\033[1m#{str}\033[0m"
7
+ end
8
+
9
+ USAGE = "Usage: #{bold_string('csv-change-eol')} <csv_file> <end of line character sequence in hex>"
10
+
11
+ def exit_on_error(msg)
12
+ $stderr.print <<STR
13
+ Error: #{bold_string(msg)}
14
+
15
+ #{USAGE}
16
+
17
+ End of line example: '7C5E7C0A' is '|^|\\n'
18
+ - 0A is new line
19
+ - 0D is carriage return
20
+
21
+ Goto: #{bold_string('http://www.asciitable.com/')} for help with the character sequence
22
+
23
+ STR
24
+ exit 1
25
+ end
26
+
27
+ csv_file = ARGV.shift || exit_on_error('no csv file specified')
28
+ eol_sequence = ARGV.shift || exit_on_error('no EOL character sequence specified')
29
+
30
+ exit_on_error("file #{csv_file} not found") unless File.exist?(csv_file)
31
+ exit_on_error("not a HEX sequece (#{eol_sequence})") unless eol_sequence =~ /\A[0-9a-f]+\z/i
32
+ exit_on_error("incorrect number of characters in (#{eol_sequence}), should be even") unless eol_sequence.size.even?
33
+
34
+ eol_sequence = [eol_sequence].pack('H*')
35
+
36
+
37
+ escaped_csv_file =
38
+ if csv_file =~ /\.csv$/i
39
+ csv_file.sub(/(\.csv)$/i, '.escaped-eol\1')
40
+ else
41
+ csv_file + '.escaped-eol'
42
+ end
43
+
44
+
45
+ File.open(escaped_csv_file, 'wb') do |out|
46
+ CSV.foreach(csv_file) do |row|
47
+ line = row.to_csv
48
+ line.rstrip!
49
+ line.concat(eol_sequence)
50
+ out.write line
51
+ end
52
+ end
53
+
54
+ puts escaped_csv_file
@@ -7,8 +7,7 @@ begin
7
7
  CSV.open(ARGV[0], 'rb').each { }
8
8
  rescue CSV::MalformedCSVError => e
9
9
  puts e.class.to_s + ': ' + e.message
10
- if e.message =~ /Missing or stray quote in line (\d+)/ ||
11
- e.message =~ /Unclosed quoted field on line (\d+)/
10
+ if e.message =~ /line (\d+)/
12
11
  lineno = $1.to_i
13
12
  cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
14
13
  puts "running #{cmd}"
@@ -1,5 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require 'optparse'
4
+
3
5
  BYTE_ORDER_MARKS = {
4
6
  "\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
5
7
  "\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
@@ -25,11 +27,12 @@ def csv_parse_line(line)
25
27
  last_comma_pos = -1
26
28
  column = 1
27
29
 
28
- while pos = line.index(/([",])/, pos + 1)
30
+ while pos = line.index(/([",\n])/, pos + 1)
29
31
  case line[pos]
30
32
  when '"'
31
33
  if opened_quote
32
34
  if line[pos+1] == ',' ||
35
+ line[pos+1] == "\r" ||
33
36
  line[pos+1] == "\n" ||
34
37
  line[pos+1].nil?
35
38
  opened_quote = false
@@ -47,9 +50,22 @@ def csv_parse_line(line)
47
50
  # return columns
48
51
  end
49
52
  else
50
- opened_quote = true
53
+ if (last_comma_pos == -1 && pos != 0) ||
54
+ (last_comma_pos != -1 && line[pos-1] != ',')
55
+ col_end = line.index(/,/, pos + 1)
56
+ col_end ||= line.size - 1
57
+ # slice out the column value
58
+ val = line[last_comma_pos + 1, col_end - last_comma_pos - 1]
59
+ columns << [val, :stray_quote]
60
+ opened_quote = false
61
+ last_comma_pos = col_end
62
+ pos = col_end
63
+ else
64
+ opened_quote = true
65
+ end
51
66
  end
52
- when ','
67
+ when ',',
68
+ "\n"
53
69
  if ! opened_quote
54
70
  column += 1
55
71
  columns << [line[last_comma_pos + 1, pos - last_comma_pos - 1], :ok]
@@ -82,6 +98,22 @@ def parse_csv_row(file, lineno, number_of_lines)
82
98
  csv_parse_line(str)
83
99
  end
84
100
 
101
+ options = {
102
+ all_columns: false
103
+ }
104
+ OptionParser.new do |opts|
105
+ opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file> <line number> [<number of lines>]'
106
+
107
+ opts.on('-h', '--help', 'Prints this help') do
108
+ puts opts
109
+ exit
110
+ end
111
+
112
+ opts.on('-a', '--all', 'Display all columns') do
113
+ options[:all_columns] = true
114
+ end
115
+ end.parse!
116
+
85
117
  file = File.open(ARGV[0], 'rb')
86
118
  lineno = ARGV[1].to_i
87
119
  number_of_lines = (ARGV[2] || 1).to_i
@@ -96,7 +128,7 @@ file.close
96
128
  cnt = 0
97
129
  data.each do |k, (v, status)|
98
130
  cnt += 1
99
- next if empty_column?(v)
131
+ next if !options[:all_columns] && empty_column?(v)
100
132
  if status == :ok
101
133
  puts sprintf(' %-3d %s: %s', cnt, k, v)
102
134
  else
@@ -2,7 +2,8 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.1.3'
5
+ s.version = '0.2.0'
6
+ s.licenses = ['MIT']
6
7
  s.summary = 'CSV Utils'
7
8
  s.description = 'Tools for debugging malformed CSV files'
8
9
  s.authors = ['Doug Youch']
@@ -11,4 +12,6 @@ Gem::Specification.new do |s|
11
12
  s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
12
13
  s.bindir = 'bin'
13
14
  s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
15
+
16
+ s.add_runtime_dependency 'inheritance-helper'
14
17
  end
@@ -0,0 +1,8 @@
1
+ require 'csv'
2
+
3
+ # Collection of tools for working with CSV files.
4
+ module CSVUtils
5
+ autoload :CSVOptions, 'csv_utils/csv_options'
6
+ autoload :CSVReport, 'csv_utils/csv_report'
7
+ autoload :CSVRow, 'csv_utils/csv_row'
8
+ end
@@ -0,0 +1,87 @@
1
+ # Auto detect a csv files options
2
+ module CSVUtils
3
+ class CSVOptions
4
+
5
+ # this list is from https://en.wikipedia.org/wiki/Byte_order_mark
6
+ BYTE_ORDER_MARKS = {
7
+ "\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
8
+ "\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
9
+ "\xFF\xFE".force_encoding('ASCII-8BIT') => 'UTF-16',
10
+ "\x00\x00\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-32',
11
+ "\xFF\xFE\x00\x00".force_encoding('ASCII-8BIT') => 'UTF-32'
12
+ }
13
+
14
+ COL_SEPARATORS = [
15
+ "\x02",
16
+ "\t",
17
+ '|',
18
+ ','
19
+ ]
20
+
21
+ ROW_SEPARATORS = [
22
+ "\r\n",
23
+ "\n",
24
+ "\r"
25
+ ]
26
+
27
+ attr_reader :columns,
28
+ :byte_order_mark,
29
+ :encoding,
30
+ :col_separator,
31
+ :row_separator
32
+
33
+
34
+ def initialize(io)
35
+ line =
36
+ if io.is_a?(String)
37
+ File.open(io, 'rb', &:readline)
38
+ else
39
+ io.readline
40
+ end
41
+
42
+ @col_separator = auto_detect_col_sep(line)
43
+ @row_separator = auto_detect_row_sep(line)
44
+ @byte_order_mark = get_byte_order_mark(line)
45
+ @encoding = get_character_encoding(@byte_order_mark)
46
+ @columns = get_number_of_columns(line) if @col_separator
47
+ end
48
+
49
+ def valid?
50
+ return false if @col_separator.nil? || @row_separator.nil?
51
+
52
+ true
53
+ end
54
+
55
+ def auto_detect_col_sep(line)
56
+ COL_SEPARATORS.detect { |sep| line.include?(sep) }
57
+ end
58
+
59
+ def auto_detect_row_sep(line)
60
+ ROW_SEPARATORS.detect { |sep| line.include?(sep) }
61
+ end
62
+
63
+ def get_headers(line)
64
+ headers = line.split(col_separator)
65
+ headers[0] = strip_byte_order_marks(headers[0])
66
+ headers
67
+ end
68
+
69
+ def get_number_of_columns(line)
70
+ get_headers(line).size
71
+ end
72
+
73
+ def get_byte_order_mark(line)
74
+ BYTE_ORDER_MARKS.keys.detect do |bom|
75
+ line =~ /\A#{bom}/
76
+ end
77
+ end
78
+
79
+ def get_character_encoding(bom)
80
+ BYTE_ORDER_MARKS[bom] || 'UTF-8'
81
+ end
82
+
83
+ def strip_byte_order_marks(header)
84
+ @byte_order_marks ? header.sub(@byte_order_marks, '') : header
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,40 @@
1
+ # Builds a csv file from csv rows
2
+ module CSVUtils
3
+ class CSVReport
4
+ attr_reader :csv,
5
+ :must_close
6
+
7
+ def initialize(csv, csv_options = {}, &block)
8
+ @csv =
9
+ if csv.is_a?(String)
10
+ @must_close = true
11
+ mode = csv_options.delete(:mode) || 'wb'
12
+ CSV.open(csv, mode, csv_options)
13
+ else
14
+ @must_close = false
15
+ csv
16
+ end
17
+
18
+ generate(&block) if block
19
+ end
20
+
21
+ def generate
22
+ yield self
23
+ @csv.close if @must_close
24
+ end
25
+
26
+ def append(csv_row)
27
+ @csv <<
28
+ if csv_row.is_a?(Array)
29
+ csv_row
30
+ else
31
+ csv_row.to_a
32
+ end
33
+ end
34
+ alias << append
35
+
36
+ def add_headers(csv_row)
37
+ append(csv_row.is_a?(Array) ? csv_row : csv_row.csv_headers)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,51 @@
1
+ require 'inheritance-helper'
2
+
3
+ module CSVUtils
4
+ module CSVRow
5
+ def self.included(base)
6
+ base.extend InheritanceHelper::Methods
7
+ base.extend ClassMethods
8
+ end
9
+
10
+ module ClassMethods
11
+ def csv_columns
12
+ {}
13
+ end
14
+
15
+ def csv_column(header, options = {}, &block)
16
+ options[:header] ||= header.to_s
17
+
18
+ if block
19
+ options[:proc] = block
20
+ elsif options[:proc].nil?
21
+ options[:method] ||= header
22
+ end
23
+
24
+ add_value_to_class_method(:csv_columns, header => options)
25
+ end
26
+ end
27
+
28
+ def csv_headers
29
+ self.class.csv_columns.values.map { |column_options| csv_column_header(column_options) }
30
+ end
31
+
32
+ def csv_row
33
+ self.class.csv_columns.values.map { |column_options| csv_column_value(column_options) }
34
+ end
35
+ alias_method :to_a, :csv_row
36
+
37
+ private
38
+
39
+ def csv_column_header(column_options)
40
+ column_options[:header]
41
+ end
42
+
43
+ def csv_column_value(column_options)
44
+ if column_options[:proc]
45
+ instance_eval(&column_options[:proc])
46
+ else
47
+ send(column_options[:method])
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ $LOAD_PATH << File.expand_path('../lib', __dir__)
5
+ require 'csv-utils'
6
+ require 'irb'
7
+ IRB.start(__FILE__)
metadata CHANGED
@@ -1,33 +1,59 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-04-19 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2020-06-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: inheritance-helper
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  description: Tools for debugging malformed CSV files
14
28
  email: dougyouch@gmail.com
15
29
  executables:
30
+ - csv-change-eol
16
31
  - csv-find-error
17
32
  - csv-readline
18
33
  extensions: []
19
34
  extra_rdoc_files: []
20
35
  files:
21
36
  - ".gitignore"
37
+ - ".ruby-gemset"
38
+ - ".ruby-version"
39
+ - Gemfile
40
+ - Gemfile.lock
22
41
  - LICENSE
23
42
  - README.md
43
+ - bin/csv-change-eol
24
44
  - bin/csv-find-error
25
45
  - bin/csv-readline
26
46
  - csv-utils.gemspec
47
+ - lib/csv-utils.rb
48
+ - lib/csv_utils/csv_options.rb
49
+ - lib/csv_utils/csv_report.rb
50
+ - lib/csv_utils/csv_row.rb
51
+ - script/console
27
52
  homepage: https://github.com/dougyouch/csv-utils
28
- licenses: []
53
+ licenses:
54
+ - MIT
29
55
  metadata: {}
30
- post_install_message:
56
+ post_install_message:
31
57
  rdoc_options: []
32
58
  require_paths:
33
59
  - lib
@@ -42,9 +68,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
68
  - !ruby/object:Gem::Version
43
69
  version: '0'
44
70
  requirements: []
45
- rubyforge_project:
46
- rubygems_version: 2.5.2.3
47
- signing_key:
71
+ rubygems_version: 3.0.8
72
+ signing_key:
48
73
  specification_version: 4
49
74
  summary: CSV Utils
50
75
  test_files: []