csv-utils 0.1.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +57 -0
- data/csv-utils.gemspec +4 -1
- data/lib/csv-utils.rb +12 -0
- data/lib/csv_utils/csv_extender.rb +63 -0
- data/lib/csv_utils/csv_options.rb +87 -0
- data/lib/csv_utils/csv_report.rb +41 -0
- data/lib/csv_utils/csv_row.rb +57 -0
- data/lib/csv_utils/csv_sort.rb +112 -0
- data/lib/csv_utils/csv_transformer.rb +119 -0
- data/lib/csv_utils/csv_wrapper.rb +47 -0
- data/script/console +7 -0
- metadata +35 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98303ab9b2df05bc501c1c66b66a62be5ade9d79ab38a5b8bda8eb52d91b26cc
|
4
|
+
data.tar.gz: 8adfd2144220de2cc4f23136ee4eb7314a3c16eeac68be87e1dc19b1ac7dc350
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a2a2b2067a9ca06920b171230a122eba479c1f91af3919e2965eaec6d073fff34d544221a92cffaa1b9546078960aee0c9b9031e7b652368e975cff9b196214c
|
7
|
+
data.tar.gz: '0786cfb3e75771ccb68bfa0e2cba42994c7c04a5c8be14432ae6467425536e7dfd4a4ef33403ae5bd129eafd871a07077610c000a78762052e0b055192c0cc16'
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
csv-utils
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.6.3
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
ast (2.4.1)
|
5
|
+
diff-lcs (1.3)
|
6
|
+
docile (1.3.2)
|
7
|
+
inheritance-helper (0.1.5)
|
8
|
+
parallel (1.19.2)
|
9
|
+
parser (2.7.1.4)
|
10
|
+
ast (~> 2.4.1)
|
11
|
+
rainbow (3.0.0)
|
12
|
+
rake (13.0.1)
|
13
|
+
regexp_parser (1.7.1)
|
14
|
+
rexml (3.2.4)
|
15
|
+
rspec (3.9.0)
|
16
|
+
rspec-core (~> 3.9.0)
|
17
|
+
rspec-expectations (~> 3.9.0)
|
18
|
+
rspec-mocks (~> 3.9.0)
|
19
|
+
rspec-core (3.9.2)
|
20
|
+
rspec-support (~> 3.9.3)
|
21
|
+
rspec-expectations (3.9.2)
|
22
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
23
|
+
rspec-support (~> 3.9.0)
|
24
|
+
rspec-mocks (3.9.1)
|
25
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
26
|
+
rspec-support (~> 3.9.0)
|
27
|
+
rspec-support (3.9.3)
|
28
|
+
rubocop (0.86.0)
|
29
|
+
parallel (~> 1.10)
|
30
|
+
parser (>= 2.7.0.1)
|
31
|
+
rainbow (>= 2.2.2, < 4.0)
|
32
|
+
regexp_parser (>= 1.7)
|
33
|
+
rexml
|
34
|
+
rubocop-ast (>= 0.0.3, < 1.0)
|
35
|
+
ruby-progressbar (~> 1.7)
|
36
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
37
|
+
rubocop-ast (0.0.3)
|
38
|
+
parser (>= 2.7.0.1)
|
39
|
+
ruby-progressbar (1.10.1)
|
40
|
+
simplecov (0.18.5)
|
41
|
+
docile (~> 1.1)
|
42
|
+
simplecov-html (~> 0.11)
|
43
|
+
simplecov-html (0.12.2)
|
44
|
+
unicode-display_width (1.7.0)
|
45
|
+
|
46
|
+
PLATFORMS
|
47
|
+
ruby
|
48
|
+
|
49
|
+
DEPENDENCIES
|
50
|
+
inheritance-helper
|
51
|
+
rake
|
52
|
+
rspec
|
53
|
+
rubocop
|
54
|
+
simplecov
|
55
|
+
|
56
|
+
BUNDLED WITH
|
57
|
+
1.17.3
|
data/csv-utils.gemspec
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'csv-utils'
|
5
|
-
s.version = '0.
|
5
|
+
s.version = '0.3.0'
|
6
|
+
s.licenses = ['MIT']
|
6
7
|
s.summary = 'CSV Utils'
|
7
8
|
s.description = 'Tools for debugging malformed CSV files'
|
8
9
|
s.authors = ['Doug Youch']
|
@@ -11,4 +12,6 @@ Gem::Specification.new do |s|
|
|
11
12
|
s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
12
13
|
s.bindir = 'bin'
|
13
14
|
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
15
|
+
|
16
|
+
s.add_runtime_dependency 'inheritance-helper'
|
14
17
|
end
|
data/lib/csv-utils.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
# Collection of tools for working with CSV files.
|
4
|
+
module CSVUtils
|
5
|
+
autoload :CSVExtender, 'csv_utils/csv_extender'
|
6
|
+
autoload :CSVOptions, 'csv_utils/csv_options'
|
7
|
+
autoload :CSVReport, 'csv_utils/csv_report'
|
8
|
+
autoload :CSVRow, 'csv_utils/csv_row'
|
9
|
+
autoload :CSVSort, 'csv_utils/csv_sort'
|
10
|
+
autoload :CSVTransformer, 'csv_utils/csv_transformer'
|
11
|
+
autoload :CSVWrapper, 'csv_utils/csv_wrapper'
|
12
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Utility class for appending data to a csv file.
|
2
|
+
class CSVUtils::CSVExtender
|
3
|
+
def initialize(src_csv, dest_csv, csv_options = {})
|
4
|
+
@src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
|
5
|
+
@dest_csv = CSVUtils::CSVWrapper.new(dest_csv, 'wb', csv_options)
|
6
|
+
end
|
7
|
+
|
8
|
+
def append(additional_headers)
|
9
|
+
process(additional_headers) do |current_headers|
|
10
|
+
while (row = @src_csv.shift)
|
11
|
+
additional_columns = yield row, current_headers
|
12
|
+
@dest_csv << (row + additional_columns)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def append_in_batches(additional_headers, batch_size = 1_000)
|
18
|
+
process(additional_headers) do |current_headers|
|
19
|
+
batch = []
|
20
|
+
|
21
|
+
process_batch_proc = Proc.new do
|
22
|
+
additional_rows = yield batch, current_headers
|
23
|
+
|
24
|
+
batch.each_with_index do |row, idx|
|
25
|
+
@dest_csv << (row + additional_rows[idx])
|
26
|
+
end
|
27
|
+
|
28
|
+
batch = []
|
29
|
+
end
|
30
|
+
|
31
|
+
while (row = @src_csv.shift)
|
32
|
+
batch << row
|
33
|
+
|
34
|
+
process_batch_proc.call if batch.size >= batch_size
|
35
|
+
end
|
36
|
+
|
37
|
+
process_batch_proc.call if batch.size > 0
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def process(additional_headers)
|
44
|
+
current_headers = append_headers(additional_headers)
|
45
|
+
|
46
|
+
yield current_headers
|
47
|
+
|
48
|
+
close
|
49
|
+
end
|
50
|
+
|
51
|
+
def close
|
52
|
+
@src_csv.close
|
53
|
+
@dest_csv.close
|
54
|
+
end
|
55
|
+
|
56
|
+
def append_headers(additional_headers)
|
57
|
+
return nil unless additional_headers
|
58
|
+
|
59
|
+
current_headers = @src_csv.shift
|
60
|
+
@dest_csv << (current_headers + additional_headers)
|
61
|
+
current_headers
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# Auto detect a csv files options
|
2
|
+
module CSVUtils
|
3
|
+
class CSVOptions
|
4
|
+
|
5
|
+
# this list is from https://en.wikipedia.org/wiki/Byte_order_mark
|
6
|
+
BYTE_ORDER_MARKS = {
|
7
|
+
"\xEF\xBB\xBF".force_encoding('ASCII-8BIT') => 'UTF-8',
|
8
|
+
"\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-16',
|
9
|
+
"\xFF\xFE".force_encoding('ASCII-8BIT') => 'UTF-16',
|
10
|
+
"\x00\x00\xFE\xFF".force_encoding('ASCII-8BIT') => 'UTF-32',
|
11
|
+
"\xFF\xFE\x00\x00".force_encoding('ASCII-8BIT') => 'UTF-32'
|
12
|
+
}
|
13
|
+
|
14
|
+
COL_SEPARATORS = [
|
15
|
+
"\x02",
|
16
|
+
"\t",
|
17
|
+
'|',
|
18
|
+
','
|
19
|
+
]
|
20
|
+
|
21
|
+
ROW_SEPARATORS = [
|
22
|
+
"\r\n",
|
23
|
+
"\n",
|
24
|
+
"\r"
|
25
|
+
]
|
26
|
+
|
27
|
+
attr_reader :columns,
|
28
|
+
:byte_order_mark,
|
29
|
+
:encoding,
|
30
|
+
:col_separator,
|
31
|
+
:row_separator
|
32
|
+
|
33
|
+
|
34
|
+
def initialize(io)
|
35
|
+
line =
|
36
|
+
if io.is_a?(String)
|
37
|
+
File.open(io, 'rb', &:readline)
|
38
|
+
else
|
39
|
+
io.readline
|
40
|
+
end
|
41
|
+
|
42
|
+
@col_separator = auto_detect_col_sep(line)
|
43
|
+
@row_separator = auto_detect_row_sep(line)
|
44
|
+
@byte_order_mark = get_byte_order_mark(line)
|
45
|
+
@encoding = get_character_encoding(@byte_order_mark)
|
46
|
+
@columns = get_number_of_columns(line) if @col_separator
|
47
|
+
end
|
48
|
+
|
49
|
+
def valid?
|
50
|
+
return false if @col_separator.nil? || @row_separator.nil?
|
51
|
+
|
52
|
+
true
|
53
|
+
end
|
54
|
+
|
55
|
+
def auto_detect_col_sep(line)
|
56
|
+
COL_SEPARATORS.detect { |sep| line.include?(sep) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def auto_detect_row_sep(line)
|
60
|
+
ROW_SEPARATORS.detect { |sep| line.include?(sep) }
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_headers(line)
|
64
|
+
headers = line.split(col_separator)
|
65
|
+
headers[0] = strip_byte_order_marks(headers[0])
|
66
|
+
headers
|
67
|
+
end
|
68
|
+
|
69
|
+
def get_number_of_columns(line)
|
70
|
+
get_headers(line).size
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_byte_order_mark(line)
|
74
|
+
BYTE_ORDER_MARKS.keys.detect do |bom|
|
75
|
+
line =~ /\A#{bom}/
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def get_character_encoding(bom)
|
80
|
+
BYTE_ORDER_MARKS[bom] || 'UTF-8'
|
81
|
+
end
|
82
|
+
|
83
|
+
def strip_byte_order_marks(header)
|
84
|
+
@byte_order_marks ? header.sub(@byte_order_marks, '') : header
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# Builds a csv file from csv rows
|
2
|
+
module CSVUtils
|
3
|
+
class CSVReport
|
4
|
+
attr_reader :csv,
|
5
|
+
:must_close
|
6
|
+
|
7
|
+
def initialize(csv, headers = nil, csv_options = {}, &block)
|
8
|
+
@csv =
|
9
|
+
if csv.is_a?(String)
|
10
|
+
@must_close = true
|
11
|
+
mode = csv_options.delete(:mode) || 'wb'
|
12
|
+
CSV.open(csv, mode, csv_options)
|
13
|
+
else
|
14
|
+
@must_close = false
|
15
|
+
csv
|
16
|
+
end
|
17
|
+
|
18
|
+
generate(headers, &block) if block
|
19
|
+
end
|
20
|
+
|
21
|
+
def generate(headers = nil)
|
22
|
+
add_headers(headers) if headers
|
23
|
+
yield self
|
24
|
+
@csv.close if @must_close
|
25
|
+
end
|
26
|
+
|
27
|
+
def append(csv_row)
|
28
|
+
@csv <<
|
29
|
+
if csv_row.is_a?(Array)
|
30
|
+
csv_row
|
31
|
+
else
|
32
|
+
csv_row.to_a
|
33
|
+
end
|
34
|
+
end
|
35
|
+
alias << append
|
36
|
+
|
37
|
+
def add_headers(csv_row)
|
38
|
+
append(csv_row.is_a?(Array) ? csv_row : csv_row.csv_headers)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'inheritance-helper'
|
2
|
+
|
3
|
+
module CSVUtils
|
4
|
+
module CSVRow
|
5
|
+
def self.included(base)
|
6
|
+
base.extend InheritanceHelper::Methods
|
7
|
+
base.extend ClassMethods
|
8
|
+
end
|
9
|
+
|
10
|
+
module ClassMethods
|
11
|
+
def csv_columns
|
12
|
+
{}
|
13
|
+
end
|
14
|
+
|
15
|
+
def csv_column(header, options = {}, &block)
|
16
|
+
options[:header] ||= header.to_s
|
17
|
+
|
18
|
+
if block
|
19
|
+
options[:proc] = block
|
20
|
+
elsif options[:proc].nil?
|
21
|
+
options[:method] ||= header
|
22
|
+
end
|
23
|
+
|
24
|
+
add_value_to_class_method(:csv_columns, header => options)
|
25
|
+
end
|
26
|
+
|
27
|
+
def csv_headers
|
28
|
+
csv_columns.values.map { |column_options| csv_column_header(column_options) }
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def csv_column_header(column_options)
|
34
|
+
column_options[:header]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def csv_row
|
39
|
+
self.class.csv_columns.values.map { |column_options| csv_column_value(column_options) }
|
40
|
+
end
|
41
|
+
alias_method :to_a, :csv_row
|
42
|
+
|
43
|
+
def csv_headers
|
44
|
+
self.class.csv_headers
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def csv_column_value(column_options)
|
50
|
+
if column_options[:proc]
|
51
|
+
instance_eval(&column_options[:proc])
|
52
|
+
else
|
53
|
+
send(column_options[:method])
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
# Utility class for sorting the rows for a csv file
|
4
|
+
class CSVUtils::CSVSort
|
5
|
+
attr_reader :csv_file,
|
6
|
+
:new_csv_file,
|
7
|
+
:has_headers,
|
8
|
+
:csv_options,
|
9
|
+
:headers
|
10
|
+
|
11
|
+
def initialize(csv_file, new_csv_file, has_headers = true, csv_options = {})
|
12
|
+
@csv_file = csv_file
|
13
|
+
@new_csv_file = new_csv_file
|
14
|
+
@has_headers = has_headers
|
15
|
+
@csv_options = csv_options
|
16
|
+
@csv_part_files = []
|
17
|
+
@files_to_delete = []
|
18
|
+
end
|
19
|
+
|
20
|
+
def sort(batch_size = 100_000, &block)
|
21
|
+
create_sorted_csv_part_files(batch_size, &block)
|
22
|
+
merge_csv_part_files(&block)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def merge_sort_csv_files(src_csv_file1, src_csv_file2, dest_csv_file)
|
28
|
+
src1 = CSV.open(src_csv_file1, 'rb', csv_options)
|
29
|
+
src2 = CSV.open(src_csv_file2, 'rb', csv_options)
|
30
|
+
dest = CSV.open(dest_csv_file, 'wb', csv_options)
|
31
|
+
|
32
|
+
if @headers
|
33
|
+
dest << @headers
|
34
|
+
src1.shift
|
35
|
+
src2.shift
|
36
|
+
end
|
37
|
+
|
38
|
+
row1 = src1.shift
|
39
|
+
row2 = src2.shift
|
40
|
+
|
41
|
+
append_row1_proc = Proc.new do
|
42
|
+
dest << row1
|
43
|
+
row1 = src1.shift
|
44
|
+
end
|
45
|
+
|
46
|
+
append_row2_proc = Proc.new do
|
47
|
+
dest << row2
|
48
|
+
row2 = src2.shift
|
49
|
+
end
|
50
|
+
|
51
|
+
while row1 || row2
|
52
|
+
if row1.nil?
|
53
|
+
append_row2_proc.call
|
54
|
+
elsif row2.nil?
|
55
|
+
append_row1_proc.call
|
56
|
+
elsif yield(row1, row2) <= 0
|
57
|
+
append_row1_proc.call
|
58
|
+
else
|
59
|
+
append_row2_proc.call
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
src1.close
|
64
|
+
src2.close
|
65
|
+
dest.close
|
66
|
+
end
|
67
|
+
|
68
|
+
def create_sorted_csv_part_files(batch_size, &block)
|
69
|
+
src = CSV.open(csv_file, 'rb', csv_options)
|
70
|
+
|
71
|
+
@headers = src.shift if has_headers
|
72
|
+
|
73
|
+
batch = []
|
74
|
+
create_batch_part_proc = Proc.new do
|
75
|
+
batch.sort!(&block)
|
76
|
+
@csv_part_files << "#{new_csv_file}.part.#{@csv_part_files.size}"
|
77
|
+
CSV.open(@csv_part_files.last, 'wb', csv_options) do |csv|
|
78
|
+
csv << @headers if @headers
|
79
|
+
batch.each { |row| csv << row }
|
80
|
+
end
|
81
|
+
batch = []
|
82
|
+
end
|
83
|
+
|
84
|
+
while (row = src.shift)
|
85
|
+
batch << row
|
86
|
+
create_batch_part_proc.call if batch.size >= batch_size
|
87
|
+
end
|
88
|
+
|
89
|
+
create_batch_part_proc.call if batch.size > 0
|
90
|
+
|
91
|
+
src.close
|
92
|
+
end
|
93
|
+
|
94
|
+
def merge_csv_part_files(&block)
|
95
|
+
file_merge_cnt = 0
|
96
|
+
|
97
|
+
while @csv_part_files.size > 1
|
98
|
+
file_merge_cnt += 1
|
99
|
+
|
100
|
+
csv_part_file1 = @csv_part_files.shift
|
101
|
+
csv_part_file2 = @csv_part_files.shift
|
102
|
+
@csv_part_files << "#{new_csv_file}.merge.#{file_merge_cnt}"
|
103
|
+
|
104
|
+
merge_sort_csv_files(csv_part_file1, csv_part_file2, @csv_part_files.last, &block)
|
105
|
+
|
106
|
+
File.unlink(csv_part_file1)
|
107
|
+
File.unlink(csv_part_file2)
|
108
|
+
end
|
109
|
+
|
110
|
+
FileUtils.mv(@csv_part_files.last, new_csv_file)
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# Transforms a CSV given a series of steps
|
2
|
+
class CSVUtils::CSVTransformer
|
3
|
+
attr_reader :headers
|
4
|
+
|
5
|
+
def initialize(src_csv, dest_csv, csv_options = {})
|
6
|
+
@src_csv = CSVUtils::CSVWrapper.new(src_csv, 'rb', csv_options)
|
7
|
+
@dest_csv = CSVUtils::CSVWrapper.new(dest_csv, 'wb', csv_options)
|
8
|
+
end
|
9
|
+
|
10
|
+
def read_headers
|
11
|
+
@headers = @src_csv.shift
|
12
|
+
self
|
13
|
+
end
|
14
|
+
|
15
|
+
def additional_data(&block)
|
16
|
+
steps << [:additional_data, @headers, block]
|
17
|
+
self
|
18
|
+
end
|
19
|
+
|
20
|
+
def select(&block)
|
21
|
+
steps << [:select, @headers, block]
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
25
|
+
def reject(&block)
|
26
|
+
steps << [:reject, @headers, block]
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
def map(new_headers, &block)
|
31
|
+
steps << [:map, @headers, block]
|
32
|
+
@headers = new_headers
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
def append(additional_headers, &block)
|
37
|
+
steps << [:append, @headers, block]
|
38
|
+
|
39
|
+
if additional_headers
|
40
|
+
@headers += additional_headers
|
41
|
+
else
|
42
|
+
@headers = nil
|
43
|
+
end
|
44
|
+
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def each(&block)
|
49
|
+
steps << [:each, @headers, block]
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
def set_headers(headers)
|
54
|
+
@headers = headers
|
55
|
+
self
|
56
|
+
end
|
57
|
+
|
58
|
+
def process(batch_size = 10_000, &block)
|
59
|
+
batch = []
|
60
|
+
|
61
|
+
@dest_csv << @headers if @headers
|
62
|
+
|
63
|
+
steps_proc = Proc.new do
|
64
|
+
steps.each do |step_type, current_headers, proc|
|
65
|
+
batch = process_step(step_type, current_headers, batch, &proc)
|
66
|
+
end
|
67
|
+
|
68
|
+
batch.each { |row| @dest_csv << row }
|
69
|
+
|
70
|
+
batch = []
|
71
|
+
end
|
72
|
+
|
73
|
+
while (row = @src_csv.shift)
|
74
|
+
batch << row
|
75
|
+
steps_proc.call if batch.size >= batch_size
|
76
|
+
end
|
77
|
+
|
78
|
+
steps_proc.call if batch.size > 0
|
79
|
+
|
80
|
+
@src_csv.close
|
81
|
+
@dest_csv.close
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
def steps
|
87
|
+
@steps ||= []
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
def process_step(step_type, current_headers, batch, &block)
|
92
|
+
case step_type
|
93
|
+
when :select
|
94
|
+
batch.select! do |row|
|
95
|
+
block.call row, current_headers, @additional_data
|
96
|
+
end
|
97
|
+
when :reject
|
98
|
+
batch.reject! do |row|
|
99
|
+
block.call row, current_headers, @additional_data
|
100
|
+
end
|
101
|
+
when :map
|
102
|
+
batch.map! do |row|
|
103
|
+
block.call row, current_headers, @additional_data
|
104
|
+
end
|
105
|
+
when :append
|
106
|
+
batch.map! do |row|
|
107
|
+
row + block.call(row, current_headers, @additional_data)
|
108
|
+
end
|
109
|
+
when :additional_data
|
110
|
+
@additional_data = block.call(batch, current_headers)
|
111
|
+
when :each
|
112
|
+
batch.each do |row|
|
113
|
+
block.call(row, current_headers, @additional_data)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
batch
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# Wraps a CSV object, if wrapper opens the csv file it will close it
|
2
|
+
class CSVUtils::CSVWrapper
|
3
|
+
attr_reader :csv
|
4
|
+
|
5
|
+
def initialize(csv, mode, csv_options)
|
6
|
+
open(csv, mode, csv_options)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.open(file, mode, csv_options = {})
|
10
|
+
csv = new(file, mode, csv_options)
|
11
|
+
|
12
|
+
if block_given?
|
13
|
+
yield csv
|
14
|
+
csv.close
|
15
|
+
else
|
16
|
+
csv
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def open(csv, mode, csv_options)
|
21
|
+
if csv.is_a?(String)
|
22
|
+
@close_when_done = true
|
23
|
+
@csv = CSV.open(csv, mode, csv_options)
|
24
|
+
else
|
25
|
+
@close_when_done = false
|
26
|
+
@csv = csv
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def <<(row)
|
31
|
+
csv << row
|
32
|
+
end
|
33
|
+
|
34
|
+
def shift
|
35
|
+
csv.shift
|
36
|
+
end
|
37
|
+
|
38
|
+
def close
|
39
|
+
csv.close if close_when_done?
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def close_when_done?
|
45
|
+
@close_when_done
|
46
|
+
end
|
47
|
+
end
|
data/script/console
ADDED
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
12
|
-
dependencies:
|
11
|
+
date: 2020-07-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: inheritance-helper
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
description: Tools for debugging malformed CSV files
|
14
28
|
email: dougyouch@gmail.com
|
15
29
|
executables:
|
@@ -20,16 +34,30 @@ extensions: []
|
|
20
34
|
extra_rdoc_files: []
|
21
35
|
files:
|
22
36
|
- ".gitignore"
|
37
|
+
- ".ruby-gemset"
|
38
|
+
- ".ruby-version"
|
39
|
+
- Gemfile
|
40
|
+
- Gemfile.lock
|
23
41
|
- LICENSE
|
24
42
|
- README.md
|
25
43
|
- bin/csv-change-eol
|
26
44
|
- bin/csv-find-error
|
27
45
|
- bin/csv-readline
|
28
46
|
- csv-utils.gemspec
|
47
|
+
- lib/csv-utils.rb
|
48
|
+
- lib/csv_utils/csv_extender.rb
|
49
|
+
- lib/csv_utils/csv_options.rb
|
50
|
+
- lib/csv_utils/csv_report.rb
|
51
|
+
- lib/csv_utils/csv_row.rb
|
52
|
+
- lib/csv_utils/csv_sort.rb
|
53
|
+
- lib/csv_utils/csv_transformer.rb
|
54
|
+
- lib/csv_utils/csv_wrapper.rb
|
55
|
+
- script/console
|
29
56
|
homepage: https://github.com/dougyouch/csv-utils
|
30
|
-
licenses:
|
57
|
+
licenses:
|
58
|
+
- MIT
|
31
59
|
metadata: {}
|
32
|
-
post_install_message:
|
60
|
+
post_install_message:
|
33
61
|
rdoc_options: []
|
34
62
|
require_paths:
|
35
63
|
- lib
|
@@ -45,7 +73,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
45
73
|
version: '0'
|
46
74
|
requirements: []
|
47
75
|
rubygems_version: 3.0.8
|
48
|
-
signing_key:
|
76
|
+
signing_key:
|
49
77
|
specification_version: 4
|
50
78
|
summary: CSV Utils
|
51
79
|
test_files: []
|