csv-utils 0.1.7 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +57 -0
- data/csv-utils.gemspec +4 -1
- data/lib/csv-utils.rb +12 -0
- data/lib/csv_utils/csv_extender.rb +63 -0
- data/lib/csv_utils/csv_options.rb +87 -0
- data/lib/csv_utils/csv_report.rb +41 -0
- data/lib/csv_utils/csv_row.rb +57 -0
- data/lib/csv_utils/csv_sort.rb +112 -0
- data/lib/csv_utils/csv_transformer.rb +119 -0
- data/lib/csv_utils/csv_wrapper.rb +47 -0
- data/script/console +7 -0
- metadata +35 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98303ab9b2df05bc501c1c66b66a62be5ade9d79ab38a5b8bda8eb52d91b26cc
|
4
|
+
data.tar.gz: 8adfd2144220de2cc4f23136ee4eb7314a3c16eeac68be87e1dc19b1ac7dc350
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a2a2b2067a9ca06920b171230a122eba479c1f91af3919e2965eaec6d073fff34d544221a92cffaa1b9546078960aee0c9b9031e7b652368e975cff9b196214c
|
7
|
+
data.tar.gz: '0786cfb3e75771ccb68bfa0e2cba42994c7c04a5c8be14432ae6467425536e7dfd4a4ef33403ae5bd129eafd871a07077610c000a78762052e0b055192c0cc16'
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
csv-utils
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.6.3
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
ast (2.4.1)
|
5
|
+
diff-lcs (1.3)
|
6
|
+
docile (1.3.2)
|
7
|
+
inheritance-helper (0.1.5)
|
8
|
+
parallel (1.19.2)
|
9
|
+
parser (2.7.1.4)
|
10
|
+
ast (~> 2.4.1)
|
11
|
+
rainbow (3.0.0)
|
12
|
+
rake (13.0.1)
|
13
|
+
regexp_parser (1.7.1)
|
14
|
+
rexml (3.2.4)
|
15
|
+
rspec (3.9.0)
|
16
|
+
rspec-core (~> 3.9.0)
|
17
|
+
rspec-expectations (~> 3.9.0)
|
18
|
+
rspec-mocks (~> 3.9.0)
|
19
|
+
rspec-core (3.9.2)
|
20
|
+
rspec-support (~> 3.9.3)
|
21
|
+
rspec-expectations (3.9.2)
|
22
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
23
|
+
rspec-support (~> 3.9.0)
|
24
|
+
rspec-mocks (3.9.1)
|
25
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
26
|
+
rspec-support (~> 3.9.0)
|
27
|
+
rspec-support (3.9.3)
|
28
|
+
rubocop (0.86.0)
|
29
|
+
parallel (~> 1.10)
|
30
|
+
parser (>= 2.7.0.1)
|
31
|
+
rainbow (>= 2.2.2, < 4.0)
|
32
|
+
regexp_parser (>= 1.7)
|
33
|
+
rexml
|
34
|
+
rubocop-ast (>= 0.0.3, < 1.0)
|
35
|
+
ruby-progressbar (~> 1.7)
|
36
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
37
|
+
rubocop-ast (0.0.3)
|
38
|
+
parser (>= 2.7.0.1)
|
39
|
+
ruby-progressbar (1.10.1)
|
40
|
+
simplecov (0.18.5)
|
41
|
+
docile (~> 1.1)
|
42
|
+
simplecov-html (~> 0.11)
|
43
|
+
simplecov-html (0.12.2)
|
44
|
+
unicode-display_width (1.7.0)
|
45
|
+
|
46
|
+
PLATFORMS
|
47
|
+
ruby
|
48
|
+
|
49
|
+
DEPENDENCIES
|
50
|
+
inheritance-helper
|
51
|
+
rake
|
52
|
+
rspec
|
53
|
+
rubocop
|
54
|
+
simplecov
|
55
|
+
|
56
|
+
BUNDLED WITH
|
57
|
+
1.17.3
|
data/csv-utils.gemspec
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'csv-utils'
|
5
|
-
s.version = '0.
|
5
|
+
s.version = '0.3.0'
|
6
|
+
s.licenses = ['MIT']
|
6
7
|
s.summary = 'CSV Utils'
|
7
8
|
s.description = 'Tools for debugging malformed CSV files'
|
8
9
|
s.authors = ['Doug Youch']
|
@@ -11,4 +12,6 @@ Gem::Specification.new do |s|
|
|
11
12
|
s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
12
13
|
s.bindir = 'bin'
|
13
14
|
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
15
|
+
|
16
|
+
s.add_runtime_dependency 'inheritance-helper'
|
14
17
|
end
|
data/lib/csv-utils.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
# Collection of tools for working with CSV files.
|
4
|
+
module CSVUtils
|
5
|
+
autoload :CSVExtender, 'csv_utils/csv_extender'
|
6
|
+
autoload :CSVOptions, 'csv_utils/csv_options'
|
7
|
+
autoload :CSVReport, 'csv_utils/csv_report'
|
8
|
+
autoload :CSVRow, 'csv_utils/csv_row'
|
9
|
+
autoload :CSVSort, 'csv_utils/csv_sort'
|
10
|
+
autoload :CSVTransformer, 'csv_utils/csv_transformer'
|
11
|
+
autoload :CSVWrapper, 'csv_utils/csv_wrapper'
|
12
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Utility class for appending data to a csv file.
|
2
|
+
class CSVUtils::CSVExtender
|
3
|
+
def initialize(src_csv, dest_csv, csv_options = {})
|
4
|
+
@src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
|
5
|
+
@dest_csv = CSVUtils::CSVWrapper.new(dest_csv, 'wb', csv_options)
|
6
|
+
end
|
7
|
+
|
8
|
+
def append(additional_headers)
|
9
|
+
process(additional_headers) do |current_headers|
|
10
|
+
while (row = @src_csv.shift)
|
11
|
+
additional_columns = yield row, current_headers
|
12
|
+
@dest_csv << (row + additional_columns)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def append_in_batches(additional_headers, batch_size = 1_000)
|
18
|
+
process(additional_headers) do |current_headers|
|
19
|
+
batch = []
|
20
|
+
|
21
|
+
process_batch_proc = Proc.new do
|
22
|
+
additional_rows = yield batch, current_headers
|
23
|
+
|
24
|
+
batch.each_with_index do |row, idx|
|
25
|
+
@dest_csv << (row + additional_rows[idx])
|
26
|
+
end
|
27
|
+
|
28
|
+
batch = []
|
29
|
+
end
|
30
|
+
|
31
|
+
while (row = @src_csv.shift)
|
32
|
+
batch << row
|
33
|
+
|
34
|
+
process_batch_proc.call if batch.size >= batch_size
|
35
|
+
end
|
36
|
+
|
37
|
+
process_batch_proc.call if batch.size > 0
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def process(additional_headers)
|
44
|
+
current_headers = append_headers(additional_headers)
|
45
|
+
|
46
|
+
yield current_headers
|
47
|
+
|
48
|
+
close
|
49
|
+
end
|
50
|
+
|
51
|
+
def close
|
52
|
+
@src_csv.close
|
53
|
+
@dest_csv.close
|
54
|
+
end
|
55
|
+
|
56
|
+
def append_headers(additional_headers)
|
57
|
+
return nil unless additional_headers
|
58
|
+
|
59
|
+
current_headers = @src_csv.shift
|
60
|
+
@dest_csv << (current_headers + additional_headers)
|
61
|
+
current_headers
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# Auto detect a csv files options
|
2
|
+
module CSVUtils
|
3
|
+
class CSVOptions
|
4
|
+
|
5
|
+
# this list is from https://en.wikipedia.org/wiki/Byte_order_mark
|
6
|
+
BYTE_ORDER_MARKS = {
|
7
|
+
"\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
|
8
|
+
"\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
|
9
|
+
"\xFF\xFE".force_encoding('ASCII-8BIT') => 'UTF-16',
|
10
|
+
"\x00\x00\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-32',
|
11
|
+
"\xFF\xFE\x00\x00".force_encoding('ASCII-8BIT') => 'UTF-32'
|
12
|
+
}
|
13
|
+
|
14
|
+
COL_SEPARATORS = [
|
15
|
+
"\x02",
|
16
|
+
"\t",
|
17
|
+
'|',
|
18
|
+
','
|
19
|
+
]
|
20
|
+
|
21
|
+
ROW_SEPARATORS = [
|
22
|
+
"\r\n",
|
23
|
+
"\n",
|
24
|
+
"\r"
|
25
|
+
]
|
26
|
+
|
27
|
+
attr_reader :columns,
|
28
|
+
:byte_order_mark,
|
29
|
+
:encoding,
|
30
|
+
:col_separator,
|
31
|
+
:row_separator
|
32
|
+
|
33
|
+
|
34
|
+
def initialize(io)
|
35
|
+
line =
|
36
|
+
if io.is_a?(String)
|
37
|
+
File.open(io, 'rb', &:readline)
|
38
|
+
else
|
39
|
+
io.readline
|
40
|
+
end
|
41
|
+
|
42
|
+
@col_separator = auto_detect_col_sep(line)
|
43
|
+
@row_separator = auto_detect_row_sep(line)
|
44
|
+
@byte_order_mark = get_byte_order_mark(line)
|
45
|
+
@encoding = get_character_encoding(@byte_order_mark)
|
46
|
+
@columns = get_number_of_columns(line) if @col_separator
|
47
|
+
end
|
48
|
+
|
49
|
+
def valid?
|
50
|
+
return false if @col_separator.nil? || @row_separator.nil?
|
51
|
+
|
52
|
+
true
|
53
|
+
end
|
54
|
+
|
55
|
+
def auto_detect_col_sep(line)
|
56
|
+
COL_SEPARATORS.detect { |sep| line.include?(sep) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def auto_detect_row_sep(line)
|
60
|
+
ROW_SEPARATORS.detect { |sep| line.include?(sep) }
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_headers(line)
|
64
|
+
headers = line.split(col_separator)
|
65
|
+
headers[0] = strip_byte_order_marks(headers[0])
|
66
|
+
headers
|
67
|
+
end
|
68
|
+
|
69
|
+
def get_number_of_columns(line)
|
70
|
+
get_headers(line).size
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_byte_order_mark(line)
|
74
|
+
BYTE_ORDER_MARKS.keys.detect do |bom|
|
75
|
+
line =~ /\A#{bom}/
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def get_character_encoding(bom)
|
80
|
+
BYTE_ORDER_MARKS[bom] || 'UTF-8'
|
81
|
+
end
|
82
|
+
|
83
|
+
def strip_byte_order_marks(header)
|
84
|
+
@byte_order_marks ? header.sub(@byte_order_marks, '') : header
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# Builds a csv file from csv rows
|
2
|
+
module CSVUtils
|
3
|
+
class CSVReport
|
4
|
+
attr_reader :csv,
|
5
|
+
:must_close
|
6
|
+
|
7
|
+
def initialize(csv, headers = nil, csv_options = {}, &block)
|
8
|
+
@csv =
|
9
|
+
if csv.is_a?(String)
|
10
|
+
@must_close = true
|
11
|
+
mode = csv_options.delete(:mode) || 'wb'
|
12
|
+
CSV.open(csv, mode, csv_options)
|
13
|
+
else
|
14
|
+
@must_close = false
|
15
|
+
csv
|
16
|
+
end
|
17
|
+
|
18
|
+
generate(headers, &block) if block
|
19
|
+
end
|
20
|
+
|
21
|
+
def generate(headers = nil)
|
22
|
+
add_headers(headers) if headers
|
23
|
+
yield self
|
24
|
+
@csv.close if @must_close
|
25
|
+
end
|
26
|
+
|
27
|
+
def append(csv_row)
|
28
|
+
@csv <<
|
29
|
+
if csv_row.is_a?(Array)
|
30
|
+
csv_row
|
31
|
+
else
|
32
|
+
csv_row.to_a
|
33
|
+
end
|
34
|
+
end
|
35
|
+
alias << append
|
36
|
+
|
37
|
+
def add_headers(csv_row)
|
38
|
+
append(csv_row.is_a?(Array) ? csv_row : csv_row.csv_headers)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'inheritance-helper'
|
2
|
+
|
3
|
+
module CSVUtils
|
4
|
+
module CSVRow
|
5
|
+
def self.included(base)
|
6
|
+
base.extend InheritanceHelper::Methods
|
7
|
+
base.extend ClassMethods
|
8
|
+
end
|
9
|
+
|
10
|
+
module ClassMethods
|
11
|
+
def csv_columns
|
12
|
+
{}
|
13
|
+
end
|
14
|
+
|
15
|
+
def csv_column(header, options = {}, &block)
|
16
|
+
options[:header] ||= header.to_s
|
17
|
+
|
18
|
+
if block
|
19
|
+
options[:proc] = block
|
20
|
+
elsif options[:proc].nil?
|
21
|
+
options[:method] ||= header
|
22
|
+
end
|
23
|
+
|
24
|
+
add_value_to_class_method(:csv_columns, header => options)
|
25
|
+
end
|
26
|
+
|
27
|
+
def csv_headers
|
28
|
+
csv_columns.values.map { |column_options| csv_column_header(column_options) }
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def csv_column_header(column_options)
|
34
|
+
column_options[:header]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def csv_row
|
39
|
+
self.class.csv_columns.values.map { |column_options| csv_column_value(column_options) }
|
40
|
+
end
|
41
|
+
alias_method :to_a, :csv_row
|
42
|
+
|
43
|
+
def csv_headers
|
44
|
+
self.class.csv_headers
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def csv_column_value(column_options)
|
50
|
+
if column_options[:proc]
|
51
|
+
instance_eval(&column_options[:proc])
|
52
|
+
else
|
53
|
+
send(column_options[:method])
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
# Utility class for sorting the rows for a csv file
|
4
|
+
class CSVUtils::CSVSort
|
5
|
+
attr_reader :csv_file,
|
6
|
+
:new_csv_file,
|
7
|
+
:has_headers,
|
8
|
+
:csv_options,
|
9
|
+
:headers
|
10
|
+
|
11
|
+
def initialize(csv_file, new_csv_file, has_headers = true, csv_options = {})
|
12
|
+
@csv_file = csv_file
|
13
|
+
@new_csv_file = new_csv_file
|
14
|
+
@has_headers = has_headers
|
15
|
+
@csv_options = csv_options
|
16
|
+
@csv_part_files = []
|
17
|
+
@files_to_delete = []
|
18
|
+
end
|
19
|
+
|
20
|
+
def sort(batch_size = 100_000, &block)
|
21
|
+
create_sorted_csv_part_files(batch_size, &block)
|
22
|
+
merge_csv_part_files(&block)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def merge_sort_csv_files(src_csv_file1, src_csv_file2, dest_csv_file)
|
28
|
+
src1 = CSV.open(src_csv_file1, 'rb', csv_options)
|
29
|
+
src2 = CSV.open(src_csv_file2, 'rb', csv_options)
|
30
|
+
dest = CSV.open(dest_csv_file, 'wb', csv_options)
|
31
|
+
|
32
|
+
if @headers
|
33
|
+
dest << @headers
|
34
|
+
src1.shift
|
35
|
+
src2.shift
|
36
|
+
end
|
37
|
+
|
38
|
+
row1 = src1.shift
|
39
|
+
row2 = src2.shift
|
40
|
+
|
41
|
+
append_row1_proc = Proc.new do
|
42
|
+
dest << row1
|
43
|
+
row1 = src1.shift
|
44
|
+
end
|
45
|
+
|
46
|
+
append_row2_proc = Proc.new do
|
47
|
+
dest << row2
|
48
|
+
row2 = src2.shift
|
49
|
+
end
|
50
|
+
|
51
|
+
while row1 || row2
|
52
|
+
if row1.nil?
|
53
|
+
append_row2_proc.call
|
54
|
+
elsif row2.nil?
|
55
|
+
append_row1_proc.call
|
56
|
+
elsif yield(row1, row2) <= 0
|
57
|
+
append_row1_proc.call
|
58
|
+
else
|
59
|
+
append_row2_proc.call
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
src1.close
|
64
|
+
src2.close
|
65
|
+
dest.close
|
66
|
+
end
|
67
|
+
|
68
|
+
def create_sorted_csv_part_files(batch_size, &block)
|
69
|
+
src = CSV.open(csv_file, 'rb', csv_options)
|
70
|
+
|
71
|
+
@headers = src.shift if has_headers
|
72
|
+
|
73
|
+
batch = []
|
74
|
+
create_batch_part_proc = Proc.new do
|
75
|
+
batch.sort!(&block)
|
76
|
+
@csv_part_files << "#{new_csv_file}.part.#{@csv_part_files.size}"
|
77
|
+
CSV.open(@csv_part_files.last, 'wb', csv_options) do |csv|
|
78
|
+
csv << @headers if @headers
|
79
|
+
batch.each { |row| csv << row }
|
80
|
+
end
|
81
|
+
batch = []
|
82
|
+
end
|
83
|
+
|
84
|
+
while (row = src.shift)
|
85
|
+
batch << row
|
86
|
+
create_batch_part_proc.call if batch.size >= batch_size
|
87
|
+
end
|
88
|
+
|
89
|
+
create_batch_part_proc.call if batch.size > 0
|
90
|
+
|
91
|
+
src.close
|
92
|
+
end
|
93
|
+
|
94
|
+
def merge_csv_part_files(&block)
|
95
|
+
file_merge_cnt = 0
|
96
|
+
|
97
|
+
while @csv_part_files.size > 1
|
98
|
+
file_merge_cnt += 1
|
99
|
+
|
100
|
+
csv_part_file1 = @csv_part_files.shift
|
101
|
+
csv_part_file2 = @csv_part_files.shift
|
102
|
+
@csv_part_files << "#{new_csv_file}.merge.#{file_merge_cnt}"
|
103
|
+
|
104
|
+
merge_sort_csv_files(csv_part_file1, csv_part_file2, @csv_part_files.last, &block)
|
105
|
+
|
106
|
+
File.unlink(csv_part_file1)
|
107
|
+
File.unlink(csv_part_file2)
|
108
|
+
end
|
109
|
+
|
110
|
+
FileUtils.mv(@csv_part_files.last, new_csv_file)
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# Transforms a CSV given a series of steps
|
2
|
+
class CSVUtils::CSVTransformer
|
3
|
+
attr_reader :headers
|
4
|
+
|
5
|
+
def initialize(src_csv, dest_csv, csv_options = {})
|
6
|
+
@src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
|
7
|
+
@dest_csv = CSVUtils::CSVWrapper.new(dest_csv, 'wb', csv_options)
|
8
|
+
end
|
9
|
+
|
10
|
+
def read_headers
|
11
|
+
@headers = @src_csv.shift
|
12
|
+
self
|
13
|
+
end
|
14
|
+
|
15
|
+
def additional_data(&block)
|
16
|
+
steps << [:additional_data, @headers, block]
|
17
|
+
self
|
18
|
+
end
|
19
|
+
|
20
|
+
def select(&block)
|
21
|
+
steps << [:select, @headers, block]
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
25
|
+
def reject(&block)
|
26
|
+
steps << [:reject, @headers, block]
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
def map(new_headers, &block)
|
31
|
+
steps << [:map, @headers, block]
|
32
|
+
@headers = new_headers
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
def append(additional_headers, &block)
|
37
|
+
steps << [:append, @headers, block]
|
38
|
+
|
39
|
+
if additional_headers
|
40
|
+
@headers += additional_headers
|
41
|
+
else
|
42
|
+
@headers = nil
|
43
|
+
end
|
44
|
+
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def each(&block)
|
49
|
+
steps << [:each, @headers, block]
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
def set_headers(headers)
|
54
|
+
@headers = headers
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
def process(batch_size = 10_000, &block)
|
59
|
+
batch = []
|
60
|
+
|
61
|
+
@dest_csv << @headers if @headers
|
62
|
+
|
63
|
+
steps_proc = Proc.new do
|
64
|
+
steps.each do |step_type, current_headers, proc|
|
65
|
+
batch = process_step(step_type, current_headers, batch, &proc)
|
66
|
+
end
|
67
|
+
|
68
|
+
batch.each { |row| @dest_csv << row }
|
69
|
+
|
70
|
+
batch = []
|
71
|
+
end
|
72
|
+
|
73
|
+
while (row = @src_csv.shift)
|
74
|
+
batch << row
|
75
|
+
steps_proc.call if batch.size >= batch_size
|
76
|
+
end
|
77
|
+
|
78
|
+
steps_proc.call if batch.size > 0
|
79
|
+
|
80
|
+
@src_csv.close
|
81
|
+
@dest_csv.close
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
def steps
|
87
|
+
@steps ||= []
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
def process_step(step_type, current_headers, batch, &block)
|
92
|
+
case step_type
|
93
|
+
when :select
|
94
|
+
batch.select! do |row|
|
95
|
+
block.call row, current_headers, @additional_data
|
96
|
+
end
|
97
|
+
when :reject
|
98
|
+
batch.reject! do |row|
|
99
|
+
block.call row, current_headers, @additional_data
|
100
|
+
end
|
101
|
+
when :map
|
102
|
+
batch.map! do |row|
|
103
|
+
block.call row, current_headers, @additional_data
|
104
|
+
end
|
105
|
+
when :append
|
106
|
+
batch.map! do |row|
|
107
|
+
row + block.call(row, current_headers, @additional_data)
|
108
|
+
end
|
109
|
+
when :additional_data
|
110
|
+
@additional_data = block.call(batch, current_headers)
|
111
|
+
when :each
|
112
|
+
batch.each do |row|
|
113
|
+
block.call(row, current_headers, @additional_data)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
batch
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# Wraps a CSV object, if wrapper opens the csv file it will close it
|
2
|
+
class CSVUtils::CSVWrapper
|
3
|
+
attr_reader :csv
|
4
|
+
|
5
|
+
def initialize(csv, mode, csv_options)
|
6
|
+
open(csv, mode, csv_options)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.open(file, mode, csv_options = {})
|
10
|
+
csv = new(file, mode, csv_options)
|
11
|
+
|
12
|
+
if block_given?
|
13
|
+
yield csv
|
14
|
+
csv.close
|
15
|
+
else
|
16
|
+
csv
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def open(csv, mode, csv_options)
|
21
|
+
if csv.is_a?(String)
|
22
|
+
@close_when_done = true
|
23
|
+
@csv = CSV.open(csv, mode, csv_options)
|
24
|
+
else
|
25
|
+
@close_when_done = false
|
26
|
+
@csv = csv
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def <<(row)
|
31
|
+
csv << row
|
32
|
+
end
|
33
|
+
|
34
|
+
def shift
|
35
|
+
csv.shift
|
36
|
+
end
|
37
|
+
|
38
|
+
def close
|
39
|
+
csv.close if close_when_done?
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def close_when_done?
|
45
|
+
@close_when_done
|
46
|
+
end
|
47
|
+
end
|
data/script/console
ADDED
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
12
|
-
dependencies:
|
11
|
+
date: 2020-07-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: inheritance-helper
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
description: Tools for debugging malformed CSV files
|
14
28
|
email: dougyouch@gmail.com
|
15
29
|
executables:
|
@@ -20,16 +34,30 @@ extensions: []
|
|
20
34
|
extra_rdoc_files: []
|
21
35
|
files:
|
22
36
|
- ".gitignore"
|
37
|
+
- ".ruby-gemset"
|
38
|
+
- ".ruby-version"
|
39
|
+
- Gemfile
|
40
|
+
- Gemfile.lock
|
23
41
|
- LICENSE
|
24
42
|
- README.md
|
25
43
|
- bin/csv-change-eol
|
26
44
|
- bin/csv-find-error
|
27
45
|
- bin/csv-readline
|
28
46
|
- csv-utils.gemspec
|
47
|
+
- lib/csv-utils.rb
|
48
|
+
- lib/csv_utils/csv_extender.rb
|
49
|
+
- lib/csv_utils/csv_options.rb
|
50
|
+
- lib/csv_utils/csv_report.rb
|
51
|
+
- lib/csv_utils/csv_row.rb
|
52
|
+
- lib/csv_utils/csv_sort.rb
|
53
|
+
- lib/csv_utils/csv_transformer.rb
|
54
|
+
- lib/csv_utils/csv_wrapper.rb
|
55
|
+
- script/console
|
29
56
|
homepage: https://github.com/dougyouch/csv-utils
|
30
|
-
licenses:
|
57
|
+
licenses:
|
58
|
+
- MIT
|
31
59
|
metadata: {}
|
32
|
-
post_install_message:
|
60
|
+
post_install_message:
|
33
61
|
rdoc_options: []
|
34
62
|
require_paths:
|
35
63
|
- lib
|
@@ -45,7 +73,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
45
73
|
version: '0'
|
46
74
|
requirements: []
|
47
75
|
rubygems_version: 3.0.8
|
48
|
-
signing_key:
|
76
|
+
signing_key:
|
49
77
|
specification_version: 4
|
50
78
|
summary: CSV Utils
|
51
79
|
test_files: []
|