file_sort 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/file_sort.rb +2 -1
- data/lib/merger.rb +17 -20
- data/lib/sorter.rb +5 -5
- metadata +2 -1
data/lib/file_sort.rb
CHANGED
@@ -162,4 +162,5 @@ class FileSort
|
|
162
162
|
end
|
163
163
|
|
164
164
|
#Run as
|
165
|
-
#FileSort.new("large-file-
|
165
|
+
#FileSort.new("large-file-1000000.csv", {replace_original: false, lines_per_split: 100000}).sort!
|
166
|
+
#FileSort.new("large-file-10000000.csv", {replace_original: false}).sort!
|
data/lib/merger.rb
CHANGED
@@ -13,45 +13,42 @@ class Merger
|
|
13
13
|
f1 = File.open(@filename1)
|
14
14
|
f2 = File.open(@filename2)
|
15
15
|
|
16
|
-
f1_line = self.get_line(f1)
|
17
|
-
f2_line = self.get_line(f2)
|
16
|
+
f1_line, f1_col = self.get_line(f1)
|
17
|
+
f2_line, f2_col = self.get_line(f2)
|
18
18
|
while !f1_line.nil? and !f2_line.nil?
|
19
|
-
if
|
20
|
-
|
21
|
-
f1_line = self.get_line(f1)
|
19
|
+
if f1_col < f2_col
|
20
|
+
outfile.print f1_line
|
21
|
+
f1_line, f1_col = self.get_line(f1)
|
22
22
|
else
|
23
|
-
|
24
|
-
f2_line = self.get_line(f2)
|
23
|
+
outfile.print f2_line
|
24
|
+
f2_line, f2_col = self.get_line(f2)
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
28
|
while !f1_line.nil?
|
29
|
-
|
30
|
-
f1_line = self.get_line(f1)
|
29
|
+
outfile.print f1_line
|
30
|
+
f1_line, f1_col = self.get_line(f1, false)
|
31
31
|
end
|
32
32
|
|
33
33
|
while !f2_line.nil?
|
34
|
-
|
35
|
-
f2_line = self.get_line(f2)
|
34
|
+
outfile.print f2_line
|
35
|
+
f2_line, f2_col = self.get_line(f2, false)
|
36
36
|
end
|
37
37
|
|
38
|
-
|
39
38
|
f1.close
|
40
39
|
f2.close
|
41
40
|
outfile.close
|
42
41
|
end
|
43
42
|
|
44
|
-
def get_line(stream)
|
43
|
+
def get_line(stream, parse_cols = true)
|
45
44
|
line = stream.gets
|
46
|
-
return nil if line.nil?
|
47
|
-
line
|
48
|
-
|
49
|
-
|
45
|
+
return [nil, nil] if line.nil?
|
46
|
+
return [line, nil] unless parse_cols
|
47
|
+
cols = line.chomp.split(@column_separator, @sort_column + 2)
|
48
|
+
cols[@sort_column] = cols[@sort_column].to_i if @sort_as_int
|
49
|
+
return [line, cols[@sort_column]]
|
50
50
|
end
|
51
51
|
|
52
|
-
def write_line(stream, line)
|
53
|
-
stream.puts line.join(@column_separator)
|
54
|
-
end
|
55
52
|
end
|
56
53
|
|
57
54
|
Merger.new.merge!
|
data/lib/sorter.rb
CHANGED
@@ -12,14 +12,14 @@ class Sorter
|
|
12
12
|
lines = []
|
13
13
|
infile = File.open(@input_filename)
|
14
14
|
while line = infile.gets
|
15
|
-
|
16
|
-
|
17
|
-
lines << line
|
15
|
+
col = line.split(@column_separator, @sort_column + 2)[@sort_column]
|
16
|
+
col = col.to_i if @sort_as_int
|
17
|
+
lines << [col, line]
|
18
18
|
end
|
19
19
|
infile.close
|
20
|
-
lines.sort!{ |a, b| a[
|
20
|
+
lines.sort!{ |a, b| a[0] <=> b[0] }
|
21
21
|
outfile = File.open(@sorted_filename, "w")
|
22
|
-
lines.each{ |line| outfile.
|
22
|
+
lines.each{ |line| outfile.print line[1] }
|
23
23
|
outfile.close
|
24
24
|
end
|
25
25
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file_sort
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -12,6 +12,7 @@ cert_chain: []
|
|
12
12
|
date: 2013-01-27 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Sorts large files using merge sort on temporary files on the hard drive.
|
15
|
+
Also known as an external sort.
|
15
16
|
email: tom.oneill@live.com
|
16
17
|
executables: []
|
17
18
|
extensions: []
|