csv-utils 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '083044bd714b955ff9d5f6a44cf6d4cb3344cf7f649b105d81a94b3ddb1c9425'
4
- data.tar.gz: 5fa1cd9acacf10c275a23176e36722e189f8e6d8a85e1bfc7faf18eb45a1ca31
3
+ metadata.gz: f85c431ad42ed20382fbe91c3696153be9437a8ee755ede313be2d6f488b3770
4
+ data.tar.gz: b620cfb208a7a28573160103155564875b05990f965f9206ad89bd7cb6b5fcc7
5
5
  SHA512:
6
- metadata.gz: de36fb6c80a68b33c92f3c943a665c419ac1287f5b4b2901c1a60f45964d4b13c2199f53e8dd779b2947d6c86439fa1b1b781cc5ba2225656b2d8b50f690e4cc
7
- data.tar.gz: 0a6b5a9301f2386c2ad5bf3009ca77f949a92510ebb606844a1e6bf9fcc573f524966e68d1d8cec7db9778be3c93fb58a006fee9008e531c53a2c5391af6c0c3
6
+ metadata.gz: 02fe7a0d34f61c54a3788739cc5455dc685cad7b627a9091f2b6e5b3ed0323bf5d162cff53150e89a431c6ab42e76e014b1350c6b47f4aeed66ef727ffe76554
7
+ data.tar.gz: 117d50507b9661c1d70b89df658a97649ea2562604c9dd0764f160e9294586293edbf10e9d968da0107a2f072b0e1814d105c364f91ed2c22cca509fb48f2fd6
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'csv-utils'
5
- s.version = '0.2.1'
5
+ s.version = '0.2.2'
6
6
  s.licenses = ['MIT']
7
7
  s.summary = 'CSV Utils'
8
8
  s.description = 'Tools for debugging malformed CSV files'
@@ -6,4 +6,5 @@ module CSVUtils
6
6
  autoload :CSVOptions, 'csv_utils/csv_options'
7
7
  autoload :CSVReport, 'csv_utils/csv_report'
8
8
  autoload :CSVRow, 'csv_utils/csv_row'
9
+ autoload :CSVSort, 'csv_utils/csv_sort'
9
10
  end
@@ -0,0 +1,112 @@
1
+ require 'fileutils'
2
+
3
+ # Utility class for sorting the rows for a csv file
4
+ class CSVUtils::CSVSort
5
+ attr_reader :csv_file,
6
+ :new_csv_file,
7
+ :has_headers,
8
+ :csv_options,
9
+ :headers
10
+
11
+ def initialize(csv_file, new_csv_file, has_headers = true, csv_options = {})
12
+ @csv_file = csv_file
13
+ @new_csv_file = new_csv_file
14
+ @has_headers = has_headers
15
+ @csv_options = csv_options
16
+ @csv_part_files = []
17
+ @files_to_delete = []
18
+ end
19
+
20
+ def sort(batch_size = 100_000, &block)
21
+ create_sorted_csv_part_files(batch_size, &block)
22
+ merge_csv_part_files(&block)
23
+ end
24
+
25
+ private
26
+
27
+ def merge_sort_csv_files(src_csv_file1, src_csv_file2, dest_csv_file)
28
+ src1 = CSV.open(src_csv_file1, 'rb', csv_options)
29
+ src2 = CSV.open(src_csv_file2, 'rb', csv_options)
30
+ dest = CSV.open(dest_csv_file, 'wb', csv_options)
31
+
32
+ if @headers
33
+ dest << @headers
34
+ src1.shift
35
+ src2.shift
36
+ end
37
+
38
+ row1 = src1.shift
39
+ row2 = src2.shift
40
+
41
+ append_row1_proc = Proc.new do
42
+ dest << row1
43
+ row1 = src1.shift
44
+ end
45
+
46
+ append_row2_proc = Proc.new do
47
+ dest << row2
48
+ row2 = src2.shift
49
+ end
50
+
51
+ while row1 || row2
52
+ if row1.nil?
53
+ append_row2_proc.call
54
+ elsif row2.nil?
55
+ append_row1_proc.call
56
+ elsif yield(row1, row2) <= 0
57
+ append_row1_proc.call
58
+ else
59
+ append_row2_proc.call
60
+ end
61
+ end
62
+
63
+ src1.close
64
+ src2.close
65
+ dest.close
66
+ end
67
+
68
+ def create_sorted_csv_part_files(batch_size, &block)
69
+ src = CSV.open(csv_file, 'rb', csv_options)
70
+
71
+ @headers = src.shift if has_headers
72
+
73
+ batch = []
74
+ create_batch_part_proc = Proc.new do
75
+ batch.sort!(&block)
76
+ @csv_part_files << "#{new_csv_file}.part.#{@csv_part_files.size}"
77
+ CSV.open(@csv_part_files.last, 'wb', csv_options) do |csv|
78
+ csv << @headers if @headers
79
+ batch.each { |row| csv << row }
80
+ end
81
+ batch = []
82
+ end
83
+
84
+ while (row = src.shift)
85
+ batch << row
86
+ create_batch_part_proc.call if batch.size >= batch_size
87
+ end
88
+
89
+ create_batch_part_proc.call if batch.size > 0
90
+
91
+ src.close
92
+ end
93
+
94
+ def merge_csv_part_files(&block)
95
+ file_merge_cnt = 0
96
+
97
+ while @csv_part_files.size > 1
98
+ file_merge_cnt += 1
99
+
100
+ csv_part_file1 = @csv_part_files.shift
101
+ csv_part_file2 = @csv_part_files.shift
102
+ @csv_part_files << "#{new_csv_file}.merge.#{file_merge_cnt}"
103
+
104
+ merge_sort_csv_files(csv_part_file1, csv_part_file2, @csv_part_files.last, &block)
105
+
106
+ File.unlink(csv_part_file1)
107
+ File.unlink(csv_part_file2)
108
+ end
109
+
110
+ FileUtils.mv(@csv_part_files.last, new_csv_file)
111
+ end
112
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Doug Youch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-07-04 00:00:00.000000000 Z
11
+ date: 2020-07-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inheritance-helper
@@ -49,6 +49,7 @@ files:
49
49
  - lib/csv_utils/csv_options.rb
50
50
  - lib/csv_utils/csv_report.rb
51
51
  - lib/csv_utils/csv_row.rb
52
+ - lib/csv_utils/csv_sort.rb
52
53
  - script/console
53
54
  homepage: https://github.com/dougyouch/csv-utils
54
55
  licenses: