file_sort 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/lib/file_sort.rb +2 -1
  2. data/lib/merger.rb +17 -20
  3. data/lib/sorter.rb +5 -5
  4. metadata +2 -1
@@ -162,4 +162,5 @@ class FileSort
162
162
  end
163
163
 
164
164
  #Run as
165
- #FileSort.new("large-file-10000000.csv", {parse_as: :int, replace_original: false}).sort!
165
+ #FileSort.new("large-file-1000000.csv", {replace_original: false, lines_per_split: 100000}).sort!
166
+ #FileSort.new("large-file-10000000.csv", {replace_original: false}).sort!
@@ -13,45 +13,42 @@ class Merger
13
13
  f1 = File.open(@filename1)
14
14
  f2 = File.open(@filename2)
15
15
 
16
- f1_line = self.get_line(f1)
17
- f2_line = self.get_line(f2)
16
+ f1_line, f1_col = self.get_line(f1)
17
+ f2_line, f2_col = self.get_line(f2)
18
18
  while !f1_line.nil? and !f2_line.nil?
19
- if f1_line[@sort_column] < f2_line[@sort_column]
20
- self.write_line(outfile, f1_line)
21
- f1_line = self.get_line(f1)
19
+ if f1_col < f2_col
20
+ outfile.print f1_line
21
+ f1_line, f1_col = self.get_line(f1)
22
22
  else
23
- self.write_line(outfile, f2_line)
24
- f2_line = self.get_line(f2)
23
+ outfile.print f2_line
24
+ f2_line, f2_col = self.get_line(f2)
25
25
  end
26
26
  end
27
27
 
28
28
  while !f1_line.nil?
29
- self.write_line(outfile, f1_line)
30
- f1_line = self.get_line(f1)
29
+ outfile.print f1_line
30
+ f1_line, f1_col = self.get_line(f1, false)
31
31
  end
32
32
 
33
33
  while !f2_line.nil?
34
- self.write_line(outfile, f2_line)
35
- f2_line = self.get_line(f2)
34
+ outfile.print f2_line
35
+ f2_line, f2_col = self.get_line(f2, false)
36
36
  end
37
37
 
38
-
39
38
  f1.close
40
39
  f2.close
41
40
  outfile.close
42
41
  end
43
42
 
44
- def get_line(stream)
43
+ def get_line(stream, parse_cols = true)
45
44
  line = stream.gets
46
- return nil if line.nil?
47
- line = line.chomp.split(@column_separator)
48
- line[@sort_column] = line[@sort_column].to_i if @sort_as_int
49
- return line
45
+ return [nil, nil] if line.nil?
46
+ return [line, nil] unless parse_cols
47
+ cols = line.chomp.split(@column_separator, @sort_column + 2)
48
+ cols[@sort_column] = cols[@sort_column].to_i if @sort_as_int
49
+ return [line, cols[@sort_column]]
50
50
  end
51
51
 
52
- def write_line(stream, line)
53
- stream.puts line.join(@column_separator)
54
- end
55
52
  end
56
53
 
57
54
  Merger.new.merge!
@@ -12,14 +12,14 @@ class Sorter
12
12
  lines = []
13
13
  infile = File.open(@input_filename)
14
14
  while line = infile.gets
15
- line = line.chomp.split(@column_separator)
16
- line[@sort_column] = line[@sort_column].to_i if @sort_as_int
17
- lines << line
15
+ col = line.split(@column_separator, @sort_column + 2)[@sort_column]
16
+ col = col.to_i if @sort_as_int
17
+ lines << [col, line]
18
18
  end
19
19
  infile.close
20
- lines.sort!{ |a, b| a[@sort_column] <=> b[@sort_column] }
20
+ lines.sort!{ |a, b| a[0] <=> b[0] }
21
21
  outfile = File.open(@sorted_filename, "w")
22
- lines.each{ |line| outfile.puts(line.join(@column_separator)) }
22
+ lines.each{ |line| outfile.print line[1] }
23
23
  outfile.close
24
24
  end
25
25
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file_sort
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -12,6 +12,7 @@ cert_chain: []
12
12
  date: 2013-01-27 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Sorts large files using merge sort on temporary files on the hard drive.
15
+ Also known as an external sort.
15
16
  email: tom.oneill@live.com
16
17
  executables: []
17
18
  extensions: []