csv-utils 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9680eb915003bdf0ea797d2aa736ce5069c4cbbf2f5cfe9f6b0c504701b178cd
4
- data.tar.gz: 211e3c84c56aa414fa57ca9b6b15e227f3f4089b2e9bf0a8a4e4d19def5d1a81
3
+ metadata.gz: '083044bd714b955ff9d5f6a44cf6d4cb3344cf7f649b105d81a94b3ddb1c9425'
4
+ data.tar.gz: 5fa1cd9acacf10c275a23176e36722e189f8e6d8a85e1bfc7faf18eb45a1ca31
5
5
  SHA512:
6
- metadata.gz: ebb5a02c8ad2f63c512fe9b0e03e3dadc00d6fe09c96260e4da28a2986efc7bc6d9414d7e03893df39bf9a83f3d25a92da7b6e81a47225fd466d9bf9acb09cc0
7
- data.tar.gz: 470637ff20a19c7674604a0afca3654f41c9d8bed496af1198e2dc0fe6c8aa5e9ccacf32e27d9c3cacba0e77914b585463a3644beff9d4eec54e0aaf18ebb9a1
6
+ metadata.gz: de36fb6c80a68b33c92f3c943a665c419ac1287f5b4b2901c1a60f45964d4b13c2199f53e8dd779b2947d6c86439fa1b1b781cc5ba2225656b2d8b50f690e4cc
7
+ data.tar.gz: 0a6b5a9301f2386c2ad5bf3009ca77f949a92510ebb606844a1e6bf9fcc573f524966e68d1d8cec7db9778be3c93fb58a006fee9008e531c53a2c5391af6c0c3
@@ -0,0 +1 @@
1
+ csv-utils
@@ -0,0 +1 @@
1
+ 2.6.3
data/Gemfile ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'http://rubygems.org'
4
+
5
+ gem 'inheritance-helper'
6
+
7
+ group :development do
8
+ gem 'rake'
9
+ gem 'rubocop'
10
+ end
11
+
12
+ group :spec do
13
+ gem 'rspec'
14
+ gem 'simplecov'
15
+ end
@@ -0,0 +1,57 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ ast (2.4.1)
5
+ diff-lcs (1.3)
6
+ docile (1.3.2)
7
+ inheritance-helper (0.1.5)
8
+ parallel (1.19.2)
9
+ parser (2.7.1.4)
10
+ ast (~> 2.4.1)
11
+ rainbow (3.0.0)
12
+ rake (13.0.1)
13
+ regexp_parser (1.7.1)
14
+ rexml (3.2.4)
15
+ rspec (3.9.0)
16
+ rspec-core (~> 3.9.0)
17
+ rspec-expectations (~> 3.9.0)
18
+ rspec-mocks (~> 3.9.0)
19
+ rspec-core (3.9.2)
20
+ rspec-support (~> 3.9.3)
21
+ rspec-expectations (3.9.2)
22
+ diff-lcs (>= 1.2.0, < 2.0)
23
+ rspec-support (~> 3.9.0)
24
+ rspec-mocks (3.9.1)
25
+ diff-lcs (>= 1.2.0, < 2.0)
26
+ rspec-support (~> 3.9.0)
27
+ rspec-support (3.9.3)
28
+ rubocop (0.86.0)
29
+ parallel (~> 1.10)
30
+ parser (>= 2.7.0.1)
31
+ rainbow (>= 2.2.2, < 4.0)
32
+ regexp_parser (>= 1.7)
33
+ rexml
34
+ rubocop-ast (>= 0.0.3, < 1.0)
35
+ ruby-progressbar (~> 1.7)
36
+ unicode-display_width (>= 1.4.0, < 2.0)
37
+ rubocop-ast (0.0.3)
38
+ parser (>= 2.7.0.1)
39
+ ruby-progressbar (1.10.1)
40
+ simplecov (0.18.5)
41
+ docile (~> 1.1)
42
+ simplecov-html (~> 0.11)
43
+ simplecov-html (0.12.2)
44
+ unicode-display_width (1.7.0)
45
+
46
+ PLATFORMS
47
+ ruby
48
+
49
+ DEPENDENCIES
50
+ inheritance-helper
51
+ rake
52
+ rspec
53
+ rubocop
54
+ simplecov
55
+
56
+ BUNDLED WITH
57
+ 1.17.3
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'csv'
4
+
5
+ def bold_string(str)
6
+ "\033[1m#{str}\033[0m"
7
+ end
8
+
9
+ USAGE = "Usage: #{bold_string('csv-change-eol')} <csv_file> <end of line character sequence in hex>"
10
+
11
+ def exit_on_error(msg)
12
+ $stderr.print <<STR
13
+ Error: #{bold_string(msg)}
14
+
15
+ #{USAGE}
16
+
17
+ End of line example: '7C5E7C0A' is '|^|\\n'
18
+ - 0A is new line
19
+ - 0D is carriage return
20
+
21
+ Goto: #{bold_string('http://www.asciitable.com/')} for help with the character sequence
22
+
23
+ STR
24
+ exit 1
25
+ end
26
+
27
+ csv_file = ARGV.shift || exit_on_error('no csv file specified')
28
+ eol_sequence = ARGV.shift || exit_on_error('no EOL character sequence specified')
29
+
30
+ exit_on_error("file #{csv_file} not found") unless File.exist?(csv_file)
31
+ exit_on_error("not a HEX sequece (#{eol_sequence})") unless eol_sequence =~ /\A[0-9a-f]+\z/i
32
+ exit_on_error("incorrect number of characters in (#{eol_sequence}), should be even") unless eol_sequence.size.even?
33
+
34
+ eol_sequence = [eol_sequence].pack('H*')
35
+
36
+
37
+ escaped_csv_file =
38
+ if csv_file =~ /\.csv$/i
39
+ csv_file.sub(/(\.csv)$/i, '.escaped-eol\1')
40
+ else
41
+ csv_file + '.escaped-eol'
42
+ end
43
+
44
+
45
+ File.open(escaped_csv_file, 'wb') do |out|
46
+ CSV.foreach(csv_file) do |row|
47
+ line = row.to_csv
48
+ line.rstrip!
49
+ line.concat(eol_sequence)
50
+ out.write line
51
+ end
52
+ end
53
+
54
+ puts escaped_csv_file
@@ -1,5 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require 'optparse'
4
+
3
5
  BYTE_ORDER_MARKS = {
4
6
  "\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
5
7
  "\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
@@ -25,11 +27,12 @@ def csv_parse_line(line)
25
27
  last_comma_pos = -1
26
28
  column = 1
27
29
 
28
- while pos = line.index(/([",])/, pos + 1)
30
+ while pos = line.index(/([",\n])/, pos + 1)
29
31
  case line[pos]
30
32
  when '"'
31
33
  if opened_quote
32
34
  if line[pos+1] == ',' ||
35
+ line[pos+1] == "\r" ||
33
36
  line[pos+1] == "\n" ||
34
37
  line[pos+1].nil?
35
38
  opened_quote = false
@@ -61,7 +64,8 @@ def csv_parse_line(line)
61
64
  opened_quote = true
62
65
  end
63
66
  end
64
- when ','
67
+ when ',',
68
+ "\n"
65
69
  if ! opened_quote
66
70
  column += 1
67
71
  columns << [line[last_comma_pos + 1, pos - last_comma_pos - 1], :ok]
@@ -94,6 +98,22 @@ def parse_csv_row(file, lineno, number_of_lines)
94
98
  csv_parse_line(str)
95
99
  end
96
100
 
101
+ options = {
102
+ all_columns: false
103
+ }
104
+ OptionParser.new do |opts|
105
+ opts.banner = 'Usage: ' + File.basename(__FILE__) + ' [options] <csv file> <line number> [<number of lines>]'
106
+
107
+ opts.on('-h', '--help', 'Prints this help') do
108
+ puts opts
109
+ exit
110
+ end
111
+
112
+ opts.on('-a', '--all', 'Display all columns') do
113
+ options[:all_columns] = true
114
+ end
115
+ end.parse!
116
+
97
117
  file = File.open(ARGV[0], 'rb')
98
118
  lineno = ARGV[1].to_i
99
119
  number_of_lines = (ARGV[2] || 1).to_i
@@ -108,7 +128,7 @@ file.close
108
128
  cnt = 0
109
129
  data.each do |k, (v, status)|
110
130
  cnt += 1
111
- next if empty_column?(v)
131
+ next if !options[:all_columns] && empty_column?(v)
112
132
  if status == :ok
113
133
  puts sprintf(' %-3d %s: %s', cnt, k, v)
114
134
  else
@@ -2,7 +2,8 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.1.4'
5
+ s.version = '0.2.1'
6
+ s.licenses = ['MIT']
6
7
  s.summary = 'CSV Utils'
7
8
  s.description = 'Tools for debugging malformed CSV files'
8
9
  s.authors = ['Doug Youch']
@@ -11,4 +12,6 @@ Gem::Specification.new do |s|
11
12
  s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
12
13
  s.bindir = 'bin'
13
14
  s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
15
+
16
+ s.add_runtime_dependency 'inheritance-helper'
14
17
  end
@@ -0,0 +1,9 @@
1
+ require 'csv'
2
+
3
+ # Collection of tools for working with CSV files.
4
+ module CSVUtils
5
+ autoload :CSVExtender, 'csv_utils/csv_extender'
6
+ autoload :CSVOptions, 'csv_utils/csv_options'
7
+ autoload :CSVReport, 'csv_utils/csv_report'
8
+ autoload :CSVRow, 'csv_utils/csv_row'
9
+ end
@@ -0,0 +1,76 @@
1
+ # Utility class for appending data to a csv file.
2
+ class CSVUtils::CSVExtender
3
+ attr_reader :csv_file,
4
+ :new_csv_file,
5
+ :csv_options
6
+
7
+ def initialize(csv_file, new_csv_file, csv_options = {})
8
+ @csv_file = csv_file
9
+ @new_csv_file = new_csv_file
10
+ @csv_options = csv_options
11
+ end
12
+
13
+ def append(additional_headers)
14
+ process(additional_headers) do |current_headers|
15
+ while (row = src.shift)
16
+ additional_columns = yield row, current_headers
17
+ dest << (row + additional_columns)
18
+ end
19
+ end
20
+ end
21
+
22
+ def append_in_batches(additional_headers, batch_size = 1_000)
23
+ process(additional_headers) do |current_headers|
24
+ batch = []
25
+
26
+ process_batch_proc = Proc.new do
27
+ additional_rows = yield batch, current_headers
28
+
29
+ batch.each_with_index do |row, idx|
30
+ dest << (row + additional_rows[idx])
31
+ end
32
+
33
+ batch = []
34
+ end
35
+
36
+ while (row = src.shift)
37
+ batch << row
38
+
39
+ process_batch_proc.call if batch.size >= batch_size
40
+ end
41
+
42
+ process_batch_proc.call if batch.size > 0
43
+ end
44
+ end
45
+
46
+ def process(additional_headers)
47
+ current_headers = append_headers(additional_headers)
48
+
49
+ yield current_headers
50
+
51
+ close
52
+ end
53
+
54
+ def src
55
+ @src ||= CSV.open(csv_file, 'rb', csv_options)
56
+ end
57
+
58
+ def dest
59
+ @dest ||= CSV.open(new_csv_file, 'wb', csv_options)
60
+ end
61
+
62
+ def close
63
+ src.close
64
+ dest.close
65
+ end
66
+
67
+ private
68
+
69
+ def append_headers(additional_headers)
70
+ return nil unless additional_headers
71
+
72
+ current_headers = src.shift
73
+ dest << (current_headers + additional_headers)
74
+ current_headers
75
+ end
76
+ end
@@ -0,0 +1,87 @@
1
+ # Auto detect a csv files options
2
+ module CSVUtils
3
+ class CSVOptions
4
+
5
+ # this list is from https://en.wikipedia.org/wiki/Byte_order_mark
6
+ BYTE_ORDER_MARKS = {
7
+ "\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
8
+ "\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
9
+ "\xFF\xFE".force_encoding('ASCII-8BIT') => 'UTF-16',
10
+ "\x00\x00\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-32',
11
+ "\xFF\xFE\x00\x00".force_encoding('ASCII-8BIT') => 'UTF-32'
12
+ }
13
+
14
+ COL_SEPARATORS = [
15
+ "\x02",
16
+ "\t",
17
+ '|',
18
+ ','
19
+ ]
20
+
21
+ ROW_SEPARATORS = [
22
+ "\r\n",
23
+ "\n",
24
+ "\r"
25
+ ]
26
+
27
+ attr_reader :columns,
28
+ :byte_order_mark,
29
+ :encoding,
30
+ :col_separator,
31
+ :row_separator
32
+
33
+
34
+ def initialize(io)
35
+ line =
36
+ if io.is_a?(String)
37
+ File.open(io, 'rb', &:readline)
38
+ else
39
+ io.readline
40
+ end
41
+
42
+ @col_separator = auto_detect_col_sep(line)
43
+ @row_separator = auto_detect_row_sep(line)
44
+ @byte_order_mark = get_byte_order_mark(line)
45
+ @encoding = get_character_encoding(@byte_order_mark)
46
+ @columns = get_number_of_columns(line) if @col_separator
47
+ end
48
+
49
+ def valid?
50
+ return false if @col_separator.nil? || @row_separator.nil?
51
+
52
+ true
53
+ end
54
+
55
+ def auto_detect_col_sep(line)
56
+ COL_SEPARATORS.detect { |sep| line.include?(sep) }
57
+ end
58
+
59
+ def auto_detect_row_sep(line)
60
+ ROW_SEPARATORS.detect { |sep| line.include?(sep) }
61
+ end
62
+
63
+ def get_headers(line)
64
+ headers = line.split(col_separator)
65
+ headers[0] = strip_byte_order_marks(headers[0])
66
+ headers
67
+ end
68
+
69
+ def get_number_of_columns(line)
70
+ get_headers(line).size
71
+ end
72
+
73
+ def get_byte_order_mark(line)
74
+ BYTE_ORDER_MARKS.keys.detect do |bom|
75
+ line =~ /\A#{bom}/
76
+ end
77
+ end
78
+
79
+ def get_character_encoding(bom)
80
+ BYTE_ORDER_MARKS[bom] || 'UTF-8'
81
+ end
82
+
83
+ def strip_byte_order_marks(header)
84
+ @byte_order_marks ? header.sub(@byte_order_marks, '') : header
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,40 @@
1
+ # Builds a csv file from csv rows
2
+ module CSVUtils
3
+ class CSVReport
4
+ attr_reader :csv,
5
+ :must_close
6
+
7
+ def initialize(csv, csv_options = {}, &block)
8
+ @csv =
9
+ if csv.is_a?(String)
10
+ @must_close = true
11
+ mode = csv_options.delete(:mode) || 'wb'
12
+ CSV.open(csv, mode, csv_options)
13
+ else
14
+ @must_close = false
15
+ csv
16
+ end
17
+
18
+ generate(&block) if block
19
+ end
20
+
21
+ def generate
22
+ yield self
23
+ @csv.close if @must_close
24
+ end
25
+
26
+ def append(csv_row)
27
+ @csv <<
28
+ if csv_row.is_a?(Array)
29
+ csv_row
30
+ else
31
+ csv_row.to_a
32
+ end
33
+ end
34
+ alias << append
35
+
36
+ def add_headers(csv_row)
37
+ append(csv_row.is_a?(Array) ? csv_row : csv_row.csv_headers)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,51 @@
1
+ require 'inheritance-helper'
2
+
3
+ module CSVUtils
4
+ module CSVRow
5
+ def self.included(base)
6
+ base.extend InheritanceHelper::Methods
7
+ base.extend ClassMethods
8
+ end
9
+
10
+ module ClassMethods
11
+ def csv_columns
12
+ {}
13
+ end
14
+
15
+ def csv_column(header, options = {}, &block)
16
+ options[:header] ||= header.to_s
17
+
18
+ if block
19
+ options[:proc] = block
20
+ elsif options[:proc].nil?
21
+ options[:method] ||= header
22
+ end
23
+
24
+ add_value_to_class_method(:csv_columns, header => options)
25
+ end
26
+ end
27
+
28
+ def csv_headers
29
+ self.class.csv_columns.values.map { |column_options| csv_column_header(column_options) }
30
+ end
31
+
32
+ def csv_row
33
+ self.class.csv_columns.values.map { |column_options| csv_column_value(column_options) }
34
+ end
35
+ alias_method :to_a, :csv_row
36
+
37
+ private
38
+
39
+ def csv_column_header(column_options)
40
+ column_options[:header]
41
+ end
42
+
43
+ def csv_column_value(column_options)
44
+ if column_options[:proc]
45
+ instance_eval(&column_options[:proc])
46
+ else
47
+ send(column_options[:method])
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ $LOAD_PATH << File.expand_path('../lib', __dir__)
5
+ require 'csv-utils'
6
+ require 'irb'
7
+ IRB.start(__FILE__)
metadata CHANGED
@@ -1,33 +1,60 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-05-14 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2020-07-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: inheritance-helper
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  description: Tools for debugging malformed CSV files
14
28
  email: dougyouch@gmail.com
15
29
  executables:
30
+ - csv-change-eol
16
31
  - csv-find-error
17
32
  - csv-readline
18
33
  extensions: []
19
34
  extra_rdoc_files: []
20
35
  files:
21
36
  - ".gitignore"
37
+ - ".ruby-gemset"
38
+ - ".ruby-version"
39
+ - Gemfile
40
+ - Gemfile.lock
22
41
  - LICENSE
23
42
  - README.md
43
+ - bin/csv-change-eol
24
44
  - bin/csv-find-error
25
45
  - bin/csv-readline
26
46
  - csv-utils.gemspec
47
+ - lib/csv-utils.rb
48
+ - lib/csv_utils/csv_extender.rb
49
+ - lib/csv_utils/csv_options.rb
50
+ - lib/csv_utils/csv_report.rb
51
+ - lib/csv_utils/csv_row.rb
52
+ - script/console
27
53
  homepage: https://github.com/dougyouch/csv-utils
28
- licenses: []
54
+ licenses:
55
+ - MIT
29
56
  metadata: {}
30
- post_install_message:
57
+ post_install_message:
31
58
  rdoc_options: []
32
59
  require_paths:
33
60
  - lib
@@ -42,8 +69,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
69
  - !ruby/object:Gem::Version
43
70
  version: '0'
44
71
  requirements: []
45
- rubygems_version: 3.0.3
46
- signing_key:
72
+ rubygems_version: 3.0.8
73
+ signing_key:
47
74
  specification_version: 4
48
75
  summary: CSV Utils
49
76
  test_files: []