csv-utils 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: d2ec5906d30d2be8c744e4311fc0c7af0f58a5f4
4
- data.tar.gz: bcd816369f3e2698e30b372e3dc9c5a74a30b02f
2
+ SHA256:
3
+ metadata.gz: ecb75f60c8e9b9db4cc3eb0e4ca3a0ac53aad67726ed995b7e8c341cd0dc76a3
4
+ data.tar.gz: 5138b5cc82eec0b7667c9e3435c2662bbfa1de51e469582372a158b943e57d7f
5
5
  SHA512:
6
- metadata.gz: 44b66f060cbba4c1c2d48d62e40f6b17d437334633199e4de22988100dbd74b4bba92b9c03247bd284c562bf57b1ffdc0823e05d4591781f2cb01882fdaecc55
7
- data.tar.gz: 14aba3fd327f3f03d9e82a3a189791aa1cc84985cd78fbb752dac8d03373868af92919a8e87e900e6e4765c3e41291a2203515cc4fc365c605fd4639df60fd59
6
+ metadata.gz: 1a7d685b0db28805833596b32793fca968c6a5a1f57346223b487579622423d0151a122253c69806e377015fe0cf9cf02381bafbee37cb0ba54fa290a857c1cc
7
+ data.tar.gz: 7c572f9e7c74d626084612afa188bb15b036377bbac16ed5374d7fcff300e58fab8100a77729c2fa428e4294ac6b3b643feb00b2e87e7de78883c8548193de54
@@ -0,0 +1 @@
1
+ csv-utils
@@ -0,0 +1 @@
1
+ 2.6.3
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'http://rubygems.org'
4
+
5
+ gem 'inheritance-helper'
6
+
7
+ group :development do
8
+ gem 'rake'
9
+ gem 'rubocop'
10
+ end
11
+
12
+ group :spec do
13
+ gem 'rspec'
14
+ gem 'simplecov'
15
+ end
@@ -0,0 +1,57 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ ast (2.4.1)
5
+ diff-lcs (1.3)
6
+ docile (1.3.2)
7
+ inheritance-helper (0.1.5)
8
+ parallel (1.19.2)
9
+ parser (2.7.1.4)
10
+ ast (~> 2.4.1)
11
+ rainbow (3.0.0)
12
+ rake (13.0.1)
13
+ regexp_parser (1.7.1)
14
+ rexml (3.2.4)
15
+ rspec (3.9.0)
16
+ rspec-core (~> 3.9.0)
17
+ rspec-expectations (~> 3.9.0)
18
+ rspec-mocks (~> 3.9.0)
19
+ rspec-core (3.9.2)
20
+ rspec-support (~> 3.9.3)
21
+ rspec-expectations (3.9.2)
22
+ diff-lcs (>= 1.2.0, < 2.0)
23
+ rspec-support (~> 3.9.0)
24
+ rspec-mocks (3.9.1)
25
+ diff-lcs (>= 1.2.0, < 2.0)
26
+ rspec-support (~> 3.9.0)
27
+ rspec-support (3.9.3)
28
+ rubocop (0.86.0)
29
+ parallel (~> 1.10)
30
+ parser (>= 2.7.0.1)
31
+ rainbow (>= 2.2.2, < 4.0)
32
+ regexp_parser (>= 1.7)
33
+ rexml
34
+ rubocop-ast (>= 0.0.3, < 1.0)
35
+ ruby-progressbar (~> 1.7)
36
+ unicode-display_width (>= 1.4.0, < 2.0)
37
+ rubocop-ast (0.0.3)
38
+ parser (>= 2.7.0.1)
39
+ ruby-progressbar (1.10.1)
40
+ simplecov (0.18.5)
41
+ docile (~> 1.1)
42
+ simplecov-html (~> 0.11)
43
+ simplecov-html (0.12.2)
44
+ unicode-display_width (1.7.0)
45
+
46
+ PLATFORMS
47
+ ruby
48
+
49
+ DEPENDENCIES
50
+ inheritance-helper
51
+ rake
52
+ rspec
53
+ rubocop
54
+ simplecov
55
+
56
+ BUNDLED WITH
57
+ 1.17.3
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv'
4
+
5
+ def bold_string(str)
6
+ "\033[1m#{str}\033[0m"
7
+ end
8
+
9
+ USAGE = "Usage: #{bold_string('csv-change-eol')} <csv_file> <end of line character sequence in hex>"
10
+
11
+ def exit_on_error(msg)
12
+ $stderr.print <<STR
13
+ Error: #{bold_string(msg)}
14
+
15
+ #{USAGE}
16
+
17
+ End of line example: '7C5E7C0A' is '|^|\\n'
18
+ - 0A is new line
19
+ - 0D is carriage return
20
+
21
+ Goto: #{bold_string('http://www.asciitable.com/')} for help with the character sequence
22
+
23
+ STR
24
+ exit 1
25
+ end
26
+
27
+ csv_file = ARGV.shift || exit_on_error('no csv file specified')
28
+ eol_sequence = ARGV.shift || exit_on_error('no EOL character sequence specified')
29
+
30
+ exit_on_error("file #{csv_file} not found") unless File.exist?(csv_file)
31
+ exit_on_error("not a HEX sequece (#{eol_sequence})") unless eol_sequence =~ /\A[0-9a-f]+\z/i
32
+ exit_on_error("incorrect number of characters in (#{eol_sequence}), should be even") unless eol_sequence.size.even?
33
+
34
+ eol_sequence = [eol_sequence].pack('H*')
35
+
36
+
37
+ escaped_csv_file =
38
+ if csv_file =~ /\.csv$/i
39
+ csv_file.sub(/(\.csv)$/i, '.escaped-eol\1')
40
+ else
41
+ csv_file + '.escaped-eol'
42
+ end
43
+
44
+
45
+ File.open(escaped_csv_file, 'wb') do |out|
46
+ CSV.foreach(csv_file) do |row|
47
+ line = row.to_csv
48
+ line.rstrip!
49
+ line.concat(eol_sequence)
50
+ out.write line
51
+ end
52
+ end
53
+
54
+ puts escaped_csv_file
@@ -7,8 +7,7 @@ begin
7
7
  CSV.open(ARGV[0], 'rb').each { }
8
8
  rescue CSV::MalformedCSVError => e
9
9
  puts e.class.to_s + ': ' + e.message
10
- if e.message =~ /Missing or stray quote in line (\d+)/ ||
11
- e.message =~ /Unclosed quoted field on line (\d+)/
10
+ if e.message =~ /line (\d+)/
12
11
  lineno = $1.to_i
13
12
  cmd = "csv-readline #{Shellwords.escape(ARGV[0])} #{lineno}"
14
13
  puts "running #{cmd}"
@@ -1,5 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require 'optparse'
4
+
3
5
  BYTE_ORDER_MARKS = {
4
6
  "\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
5
7
  "\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
@@ -25,11 +27,12 @@ def csv_parse_line(line)
25
27
  last_comma_pos = -1
26
28
  column = 1
27
29
 
28
- while pos = line.index(/([",])/, pos + 1)
30
+ while pos = line.index(/([",\n])/, pos + 1)
29
31
  case line[pos]
30
32
  when '"'
31
33
  if opened_quote
32
34
  if line[pos+1] == ',' ||
35
+ line[pos+1] == "\r" ||
33
36
  line[pos+1] == "\n" ||
34
37
  line[pos+1].nil?
35
38
  opened_quote = false
@@ -47,9 +50,22 @@ def csv_parse_line(line)
47
50
  # return columns
48
51
  end
49
52
  else
50
- opened_quote = true
53
+ if (last_comma_pos == -1 && pos != 0) ||
54
+ (last_comma_pos != -1 && line[pos-1] != ',')
55
+ col_end = line.index(/,/, pos + 1)
56
+ col_end ||= line.size - 1
57
+ # slice out the column value
58
+ val = line[last_comma_pos + 1, col_end - last_comma_pos - 1]
59
+ columns << [val, :stray_quote]
60
+ opened_quote = false
61
+ last_comma_pos = col_end
62
+ pos = col_end
63
+ else
64
+ opened_quote = true
65
+ end
51
66
  end
52
- when ','
67
+ when ',',
68
+ "\n"
53
69
  if ! opened_quote
54
70
  column += 1
55
71
  columns << [line[last_comma_pos + 1, pos - last_comma_pos - 1], :ok]
@@ -82,6 +98,22 @@ def parse_csv_row(file, lineno, number_of_lines)
82
98
  csv_parse_line(str)
83
99
  end
84
100
 
101
+ options = {
102
+ all_columns: false
103
+ }
104
+ OptionParser.new do |opts|
105
+ opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file> <line number> [<number of lines>]'
106
+
107
+ opts.on('-h', '--help', 'Prints this help') do
108
+ puts opts
109
+ exit
110
+ end
111
+
112
+ opts.on('-a', '--all', 'Display all columns') do
113
+ options[:all_columns] = true
114
+ end
115
+ end.parse!
116
+
85
117
  file = File.open(ARGV[0], 'rb')
86
118
  lineno = ARGV[1].to_i
87
119
  number_of_lines = (ARGV[2] || 1).to_i
@@ -96,7 +128,7 @@ file.close
96
128
  cnt = 0
97
129
  data.each do |k, (v, status)|
98
130
  cnt += 1
99
- next if empty_column?(v)
131
+ next if !options[:all_columns] && empty_column?(v)
100
132
  if status == :ok
101
133
  puts sprintf(' %-3d %s: %s', cnt, k, v)
102
134
  else
@@ -2,7 +2,8 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.1.3'
5
+ s.version = '0.2.0'
6
+ s.licenses = ['MIT']
6
7
  s.summary = 'CSV Utils'
7
8
  s.description = 'Tools for debugging malformed CSV files'
8
9
  s.authors = ['Doug Youch']
@@ -11,4 +12,6 @@ Gem::Specification.new do |s|
11
12
  s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
12
13
  s.bindir = 'bin'
13
14
  s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
15
+
16
+ s.add_runtime_dependency 'inheritance-helper'
14
17
  end
@@ -0,0 +1,8 @@
1
+ require 'csv'
2
+
3
+ # Collection of tools for working with CSV files.
4
+ module CSVUtils
5
+ autoload :CSVOptions, 'csv_utils/csv_options'
6
+ autoload :CSVReport, 'csv_utils/csv_report'
7
+ autoload :CSVRow, 'csv_utils/csv_row'
8
+ end
@@ -0,0 +1,87 @@
1
+ # Auto detect a csv files options
2
+ module CSVUtils
3
+ class CSVOptions
4
+
5
+ # this list is from https://en.wikipedia.org/wiki/Byte_order_mark
6
+ BYTE_ORDER_MARKS = {
7
+ "\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
8
+ "\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
9
+ "\xFF\xFE".force_encoding('ASCII-8BIT') => 'UTF-16',
10
+ "\x00\x00\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-32',
11
+ "\xFF\xFE\x00\x00".force_encoding('ASCII-8BIT') => 'UTF-32'
12
+ }
13
+
14
+ COL_SEPARATORS = [
15
+ "\x02",
16
+ "\t",
17
+ '|',
18
+ ','
19
+ ]
20
+
21
+ ROW_SEPARATORS = [
22
+ "\r\n",
23
+ "\n",
24
+ "\r"
25
+ ]
26
+
27
+ attr_reader :columns,
28
+ :byte_order_mark,
29
+ :encoding,
30
+ :col_separator,
31
+ :row_separator
32
+
33
+
34
+ def initialize(io)
35
+ line =
36
+ if io.is_a?(String)
37
+ File.open(io, 'rb', &:readline)
38
+ else
39
+ io.readline
40
+ end
41
+
42
+ @col_separator = auto_detect_col_sep(line)
43
+ @row_separator = auto_detect_row_sep(line)
44
+ @byte_order_mark = get_byte_order_mark(line)
45
+ @encoding = get_character_encoding(@byte_order_mark)
46
+ @columns = get_number_of_columns(line) if @col_separator
47
+ end
48
+
49
+ def valid?
50
+ return false if @col_separator.nil? || @row_separator.nil?
51
+
52
+ true
53
+ end
54
+
55
+ def auto_detect_col_sep(line)
56
+ COL_SEPARATORS.detect { |sep| line.include?(sep) }
57
+ end
58
+
59
+ def auto_detect_row_sep(line)
60
+ ROW_SEPARATORS.detect { |sep| line.include?(sep) }
61
+ end
62
+
63
+ def get_headers(line)
64
+ headers = line.split(col_separator)
65
+ headers[0] = strip_byte_order_marks(headers[0])
66
+ headers
67
+ end
68
+
69
+ def get_number_of_columns(line)
70
+ get_headers(line).size
71
+ end
72
+
73
+ def get_byte_order_mark(line)
74
+ BYTE_ORDER_MARKS.keys.detect do |bom|
75
+ line =~ /\A#{bom}/
76
+ end
77
+ end
78
+
79
+ def get_character_encoding(bom)
80
+ BYTE_ORDER_MARKS[bom] || 'UTF-8'
81
+ end
82
+
83
+ def strip_byte_order_marks(header)
84
+ @byte_order_marks ? header.sub(@byte_order_marks, '') : header
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,40 @@
1
+ # Builds a csv file from csv rows
2
+ module CSVUtils
3
+ class CSVReport
4
+ attr_reader :csv,
5
+ :must_close
6
+
7
+ def initialize(csv, csv_options = {}, &block)
8
+ @csv =
9
+ if csv.is_a?(String)
10
+ @must_close = true
11
+ mode = csv_options.delete(:mode) || 'wb'
12
+ CSV.open(csv, mode, csv_options)
13
+ else
14
+ @must_close = false
15
+ csv
16
+ end
17
+
18
+ generate(&block) if block
19
+ end
20
+
21
+ def generate
22
+ yield self
23
+ @csv.close if @must_close
24
+ end
25
+
26
+ def append(csv_row)
27
+ @csv <<
28
+ if csv_row.is_a?(Array)
29
+ csv_row
30
+ else
31
+ csv_row.to_a
32
+ end
33
+ end
34
+ alias << append
35
+
36
+ def add_headers(csv_row)
37
+ append(csv_row.is_a?(Array) ? csv_row : csv_row.csv_headers)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,51 @@
1
+ require 'inheritance-helper'
2
+
3
+ module CSVUtils
4
+ module CSVRow
5
+ def self.included(base)
6
+ base.extend InheritanceHelper::Methods
7
+ base.extend ClassMethods
8
+ end
9
+
10
+ module ClassMethods
11
+ def csv_columns
12
+ {}
13
+ end
14
+
15
+ def csv_column(header, options = {}, &block)
16
+ options[:header] ||= header.to_s
17
+
18
+ if block
19
+ options[:proc] = block
20
+ elsif options[:proc].nil?
21
+ options[:method] ||= header
22
+ end
23
+
24
+ add_value_to_class_method(:csv_columns, header => options)
25
+ end
26
+ end
27
+
28
+ def csv_headers
29
+ self.class.csv_columns.values.map { |column_options| csv_column_header(column_options) }
30
+ end
31
+
32
+ def csv_row
33
+ self.class.csv_columns.values.map { |column_options| csv_column_value(column_options) }
34
+ end
35
+ alias_method :to_a, :csv_row
36
+
37
+ private
38
+
39
+ def csv_column_header(column_options)
40
+ column_options[:header]
41
+ end
42
+
43
+ def csv_column_value(column_options)
44
+ if column_options[:proc]
45
+ instance_eval(&column_options[:proc])
46
+ else
47
+ send(column_options[:method])
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ $LOAD_PATH << File.expand_path('../lib', __dir__)
5
+ require 'csv-utils'
6
+ require 'irb'
7
+ IRB.start(__FILE__)
metadata CHANGED
@@ -1,33 +1,59 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-04-19 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2020-06-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: inheritance-helper
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  description: Tools for debugging malformed CSV files
14
28
  email: dougyouch@gmail.com
15
29
  executables:
30
+ - csv-change-eol
16
31
  - csv-find-error
17
32
  - csv-readline
18
33
  extensions: []
19
34
  extra_rdoc_files: []
20
35
  files:
21
36
  - ".gitignore"
37
+ - ".ruby-gemset"
38
+ - ".ruby-version"
39
+ - Gemfile
40
+ - Gemfile.lock
22
41
  - LICENSE
23
42
  - README.md
43
+ - bin/csv-change-eol
24
44
  - bin/csv-find-error
25
45
  - bin/csv-readline
26
46
  - csv-utils.gemspec
47
+ - lib/csv-utils.rb
48
+ - lib/csv_utils/csv_options.rb
49
+ - lib/csv_utils/csv_report.rb
50
+ - lib/csv_utils/csv_row.rb
51
+ - script/console
27
52
  homepage: https://github.com/dougyouch/csv-utils
28
- licenses: []
53
+ licenses:
54
+ - MIT
29
55
  metadata: {}
30
- post_install_message:
56
+ post_install_message:
31
57
  rdoc_options: []
32
58
  require_paths:
33
59
  - lib
@@ -42,9 +68,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
68
  - !ruby/object:Gem::Version
43
69
  version: '0'
44
70
  requirements: []
45
- rubyforge_project:
46
- rubygems_version: 2.5.2.3
47
- signing_key:
71
+ rubygems_version: 3.0.8
72
+ signing_key:
48
73
  specification_version: 4
49
74
  summary: CSV Utils
50
75
  test_files: []