offline-sort 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.circleci/config.yml +84 -0
- data/.gitignore +15 -0
- data/.rspec +3 -0
- data/.rubocop.yml +15 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile +2 -0
- data/Rakefile +3 -1
- data/lib/offline_sort/chunk/input_output/base.rb +9 -10
- data/lib/offline_sort/chunk/input_output/marshal.rb +3 -1
- data/lib/offline_sort/chunk/input_output/message_pack.rb +2 -1
- data/lib/offline_sort/chunk/input_output/yaml.rb +11 -12
- data/lib/offline_sort/chunk.rb +2 -0
- data/lib/offline_sort/fixed_size_min_heap.rb +15 -10
- data/lib/offline_sort/offline_sort.rb +11 -8
- data/lib/offline_sort/version.rb +3 -1
- data/lib/offline_sort.rb +2 -0
- data/offline-sort.gemspec +21 -15
- data/spec/offline_sort/chunk/input_output/base_spec.rb +8 -5
- data/spec/offline_sort/chunk/input_output/marshal_spec.rb +2 -0
- data/spec/offline_sort/chunk/input_output/message_pack_spec.rb +2 -0
- data/spec/offline_sort/chunk/input_output/yaml_spec.rb +2 -0
- data/spec/offline_sort/fixed_size_min_heap_spec.rb +15 -15
- data/spec/offline_sort/{offline_sort_spec.rb → sorter_spec.rb} +11 -12
- data/spec/spec_helper.rb +4 -2
- metadata +48 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a86abc81b2d3fc1653d3c662a62d9e4ff3ec4e0b7e50d82af2d048f042ff35f3
|
4
|
+
data.tar.gz: 5cfb87439f853f775f87c4c9b07147ed833979553ab1da979b083dc138c0d956
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6f3454d450a9a1a86ebbe7cb71027257f47713289b4deb72e9fc95f9bae30754a5d08a780a13f50437f5c3f227653102ca6601648533ac7e8888e8e5f7a28b9a
|
7
|
+
data.tar.gz: 683585facddc7e7b3019073e50eb4a42bad6f894982a5e392bfca88f55cb1b2ec41bbbb965a230a544bd01dc0fb81cf871e1527f35a24e8f949242814e11ac00
|
@@ -0,0 +1,84 @@
|
|
1
|
+
version: 2.1
|
2
|
+
jobs:
|
3
|
+
lint:
|
4
|
+
docker:
|
5
|
+
- image: salsify/ruby_ci:2.6.6
|
6
|
+
working_directory: ~/offline-sort
|
7
|
+
steps:
|
8
|
+
- checkout
|
9
|
+
- restore_cache:
|
10
|
+
keys:
|
11
|
+
- v1-gems-ruby-2.6.6-{{ checksum "offline-sort.gemspec" }}-{{ checksum "Gemfile" }}
|
12
|
+
- v1-gems-ruby-2.6.6-
|
13
|
+
- run:
|
14
|
+
name: Install Gems
|
15
|
+
command: |
|
16
|
+
if ! bundle check --path=vendor/bundle; then
|
17
|
+
bundle install --path=vendor/bundle --jobs=4 --retry=3
|
18
|
+
bundle clean
|
19
|
+
fi
|
20
|
+
- save_cache:
|
21
|
+
key: v1-gems-ruby-2.6.6-{{ checksum "offline-sort.gemspec" }}-{{ checksum "Gemfile" }}
|
22
|
+
paths:
|
23
|
+
- "vendor/bundle"
|
24
|
+
- "gemfiles/vendor/bundle"
|
25
|
+
- run:
|
26
|
+
name: Run Rubocop
|
27
|
+
command: bundle exec rubocop
|
28
|
+
test:
|
29
|
+
parameters:
|
30
|
+
gemfile:
|
31
|
+
type: string
|
32
|
+
ruby_version:
|
33
|
+
type: string
|
34
|
+
docker:
|
35
|
+
- image: salsify/ruby_ci:<< parameters.ruby_version >>
|
36
|
+
environment:
|
37
|
+
CIRCLE_TEST_REPORTS: "test-results"
|
38
|
+
BUNDLE_GEMFILE: << parameters.gemfile >>
|
39
|
+
working_directory: ~/offline-sort
|
40
|
+
steps:
|
41
|
+
- checkout
|
42
|
+
- unless:
|
43
|
+
condition:
|
44
|
+
equal: ["gemfiles/rails_edge.gemfile", << parameters.gemfile >>]
|
45
|
+
steps:
|
46
|
+
- restore_cache:
|
47
|
+
keys:
|
48
|
+
- v1-gems-ruby-<< parameters.ruby_version >>-{{ checksum "offline-sort.gemspec" }}-{{ checksum "<< parameters.gemfile >>" }}
|
49
|
+
- v1-gems-ruby-<< parameters.ruby_version >>-
|
50
|
+
- run:
|
51
|
+
name: Install Gems
|
52
|
+
command: |
|
53
|
+
if ! bundle check --path=vendor/bundle; then
|
54
|
+
bundle install --path=vendor/bundle --jobs=4 --retry=3
|
55
|
+
bundle clean
|
56
|
+
fi
|
57
|
+
- unless:
|
58
|
+
condition:
|
59
|
+
equal: [ "gemfiles/rails_edge.gemfile", << parameters.gemfile >> ]
|
60
|
+
steps:
|
61
|
+
- save_cache:
|
62
|
+
key: v1-gems-ruby-<< parameters.ruby_version >>-{{ checksum "offline-sort.gemspec" }}-{{ checksum "<< parameters.gemfile >>" }}
|
63
|
+
paths:
|
64
|
+
- "vendor/bundle"
|
65
|
+
- "gemfiles/vendor/bundle"
|
66
|
+
- run:
|
67
|
+
name: Run Tests
|
68
|
+
command: |
|
69
|
+
bundle exec rspec --format RspecJunitFormatter --out $CIRCLE_TEST_REPORTS/rspec/junit.xml --format progress spec
|
70
|
+
- store_test_results:
|
71
|
+
path: "test-results"
|
72
|
+
workflows:
|
73
|
+
build:
|
74
|
+
jobs:
|
75
|
+
- lint
|
76
|
+
- test:
|
77
|
+
matrix:
|
78
|
+
parameters:
|
79
|
+
gemfile:
|
80
|
+
- "Gemfile"
|
81
|
+
ruby_version:
|
82
|
+
- "2.6.8"
|
83
|
+
- "2.7.4"
|
84
|
+
- "3.0.2"
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
inherit_gem:
|
2
|
+
salsify_rubocop: conf/rubocop.yml
|
3
|
+
|
4
|
+
AllCops:
|
5
|
+
TargetRubyVersion: 2.6
|
6
|
+
Exclude:
|
7
|
+
- 'vendor/**/*'
|
8
|
+
- 'gemfiles/**/*'
|
9
|
+
|
10
|
+
# Offense count: 9
|
11
|
+
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
12
|
+
# AllowedNames: at, by, db, id, in, io, ip, of, on, os, pp, to
|
13
|
+
Naming/MethodParameterName:
|
14
|
+
Exclude:
|
15
|
+
- 'lib/offline_sort/fixed_size_min_heap.rb'
|
data/CHANGELOG.md
ADDED
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module OfflineSort
|
2
4
|
module Chunk
|
3
5
|
module InputOutput
|
@@ -12,11 +14,11 @@ module OfflineSort
|
|
12
14
|
end
|
13
15
|
|
14
16
|
def read_entry
|
15
|
-
raise(
|
17
|
+
raise MethodNotImplementedError.new("#{__method__} must be overridden by #{self.class}")
|
16
18
|
end
|
17
19
|
|
18
|
-
def write_entry(
|
19
|
-
raise(
|
20
|
+
def write_entry(_entry)
|
21
|
+
raise MethodNotImplementedError.new("#{__method__} must be overridden by #{self.class}")
|
20
22
|
end
|
21
23
|
|
22
24
|
def write_entries(entries)
|
@@ -38,12 +40,10 @@ module OfflineSort
|
|
38
40
|
|
39
41
|
def each
|
40
42
|
Enumerator.new do |yielder|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
break
|
46
|
-
end
|
43
|
+
loop do
|
44
|
+
yielder.yield(read_entry)
|
45
|
+
rescue EOFError
|
46
|
+
break
|
47
47
|
end
|
48
48
|
end
|
49
49
|
end
|
@@ -52,4 +52,3 @@ module OfflineSort
|
|
52
52
|
end
|
53
53
|
end
|
54
54
|
end
|
55
|
-
|
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module OfflineSort
|
2
4
|
module Chunk
|
3
5
|
module InputOutput
|
4
6
|
class Marshal < OfflineSort::Chunk::InputOutput::Base
|
5
7
|
def read_entry
|
6
|
-
::Marshal.load(io)
|
8
|
+
::Marshal.load(io) # rubocop:disable Security/MarshalLoad, this is loading from a trusted source
|
7
9
|
end
|
8
10
|
|
9
11
|
def write_entry(entry)
|
@@ -1,12 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'yaml'
|
2
4
|
|
3
5
|
module OfflineSort
|
4
6
|
module Chunk
|
5
7
|
module InputOutput
|
6
8
|
class Yaml < OfflineSort::Chunk::InputOutput::Base
|
7
|
-
#The yaml parser does not expose a document enumerator that we can call next on without loading the entire file
|
9
|
+
# The yaml parser does not expose a document enumerator that we can call next on without loading the entire file
|
8
10
|
def read_entry
|
9
|
-
YAML.load(next_document)
|
11
|
+
YAML.load(next_document) # rubocop:disable Security/YAMLLoad, this is loading from a trusted source
|
10
12
|
end
|
11
13
|
|
12
14
|
def write_entry(entry)
|
@@ -18,23 +20,21 @@ module OfflineSort
|
|
18
20
|
def next_document
|
19
21
|
sio = StringIO.new
|
20
22
|
document_count = 0
|
23
|
+
line = nil
|
21
24
|
|
22
|
-
|
25
|
+
loop do
|
23
26
|
line = io.gets
|
24
27
|
|
25
|
-
if line && line.start_with?('---')
|
26
|
-
document_count += 1
|
27
|
-
end
|
28
|
+
document_count += 1 if line && line.start_with?('---')
|
28
29
|
|
29
30
|
sio.write(line)
|
30
|
-
|
31
|
+
break if line.nil? || document_count > 1
|
32
|
+
end
|
31
33
|
|
32
34
|
# reset the io to the beginning of the document
|
33
|
-
if document_count > 1
|
34
|
-
io.seek(io.pos - line.length, IO::SEEK_SET)
|
35
|
-
end
|
35
|
+
io.seek(io.pos - line.length, IO::SEEK_SET) if document_count > 1
|
36
36
|
|
37
|
-
raise EOFError unless sio.size > 0
|
37
|
+
raise EOFError unless sio.size > 0 # rubocop:disable Style/ZeroLengthPredicate
|
38
38
|
|
39
39
|
sio.string
|
40
40
|
end
|
@@ -42,4 +42,3 @@ module OfflineSort
|
|
42
42
|
end
|
43
43
|
end
|
44
44
|
end
|
45
|
-
|
data/lib/offline_sort/chunk.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module OfflineSort
|
2
4
|
class FixedSizeMinHeap
|
3
5
|
attr_accessor :array
|
4
|
-
attr_reader :sort_by
|
5
|
-
attr_reader :size_limit
|
6
|
-
attr_reader :heap_end
|
6
|
+
attr_reader :sort_by, :size_limit, :heap_end
|
7
7
|
|
8
8
|
def initialize(array, &sort_by)
|
9
9
|
@array = array
|
@@ -36,16 +36,17 @@ module OfflineSort
|
|
36
36
|
|
37
37
|
def grow_heap
|
38
38
|
raise "Heap Size (#{size_limit}) Exceeded" if heap_end == (size_limit - 1)
|
39
|
+
|
39
40
|
@heap_end += 1
|
40
41
|
end
|
41
42
|
|
42
43
|
# Compare elements at the supplied indices
|
43
|
-
def compare(i,j)
|
44
|
+
def compare(i, j)
|
44
45
|
(sort_by.call(array[i]) <=> sort_by.call(array[j])) == -1
|
45
46
|
end
|
46
47
|
|
47
48
|
# Swap elements in the array
|
48
|
-
def swap(i,j)
|
49
|
+
def swap(i, j)
|
49
50
|
temp = array[i]
|
50
51
|
array[i] = array[j]
|
51
52
|
array[j] = temp
|
@@ -55,10 +56,12 @@ module OfflineSort
|
|
55
56
|
def parent(i)
|
56
57
|
(i - 1) / 2
|
57
58
|
end
|
59
|
+
|
58
60
|
# Get the node left of node i >= 0
|
59
61
|
def left(i)
|
60
62
|
(2 * i) + 1
|
61
63
|
end
|
64
|
+
|
62
65
|
# Get the node right of node i >= 0
|
63
66
|
def right(i)
|
64
67
|
(2 * i) + 2
|
@@ -67,10 +70,10 @@ module OfflineSort
|
|
67
70
|
# Keeps an heap sorted with the smallest (largest) element on top
|
68
71
|
def heapify(i)
|
69
72
|
l = left(i)
|
70
|
-
top = (
|
73
|
+
top = (l <= heap_end) && compare(l, i) ? l : i
|
71
74
|
|
72
75
|
r = right(i)
|
73
|
-
top = (
|
76
|
+
top = (r <= heap_end) && compare(r, top) ? r : top
|
74
77
|
|
75
78
|
if top != i
|
76
79
|
swap(i, top)
|
@@ -79,9 +82,11 @@ module OfflineSort
|
|
79
82
|
end
|
80
83
|
|
81
84
|
def sift_up(i)
|
82
|
-
if i > 0
|
83
|
-
|
84
|
-
|
85
|
+
if i > 0
|
86
|
+
p = parent(i)
|
87
|
+
|
88
|
+
if p && compare(i, p)
|
89
|
+
swap(i, p)
|
85
90
|
sift_up(p)
|
86
91
|
end
|
87
92
|
end
|
@@ -1,9 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'offline_sort/chunk'
|
2
4
|
require 'offline_sort/fixed_size_min_heap'
|
3
5
|
|
4
6
|
module OfflineSort
|
5
|
-
def self.sort(*args, &sort_by)
|
6
|
-
Sorter.new(*args, &sort_by).sort
|
7
|
+
def self.sort(*args, **kwargs, &sort_by)
|
8
|
+
Sorter.new(*args, **kwargs, &sort_by).sort
|
7
9
|
end
|
8
10
|
|
9
11
|
class Sorter
|
@@ -12,7 +14,10 @@ module OfflineSort
|
|
12
14
|
|
13
15
|
attr_reader :enumerable, :sort_by, :chunk_size, :chunk_input_output_class
|
14
16
|
|
15
|
-
def initialize(enumerable,
|
17
|
+
def initialize(enumerable,
|
18
|
+
chunk_input_output_class: DEFAULT_CHUNK_IO_CLASS,
|
19
|
+
chunk_size: DEFAULT_CHUNK_SIZE,
|
20
|
+
&sort_by)
|
16
21
|
@enumerable = enumerable
|
17
22
|
@chunk_input_output_class = chunk_input_output_class
|
18
23
|
@chunk_size = chunk_size
|
@@ -25,7 +30,7 @@ module OfflineSort
|
|
25
30
|
|
26
31
|
private
|
27
32
|
|
28
|
-
#TODO optimization for when there is less than a single full chunk of data
|
33
|
+
# TODO: optimization for when there is less than a single full chunk of data
|
29
34
|
def merge(sorted_chunk_ios)
|
30
35
|
pq = []
|
31
36
|
chunk_enumerators = sorted_chunk_ios.map(&:each)
|
@@ -39,7 +44,7 @@ module OfflineSort
|
|
39
44
|
pq = FixedSizeMinHeap.new(pq, &entry_sort_by)
|
40
45
|
|
41
46
|
Enumerator.new do |yielder|
|
42
|
-
while item = pq.pop
|
47
|
+
while (item = pq.pop)
|
43
48
|
yielder.yield(item.data)
|
44
49
|
|
45
50
|
begin
|
@@ -65,9 +70,7 @@ module OfflineSort
|
|
65
70
|
end
|
66
71
|
end
|
67
72
|
|
68
|
-
unless chunk_entries.empty?
|
69
|
-
sorted_chunks << write_sorted_chunk(chunk_entries)
|
70
|
-
end
|
73
|
+
sorted_chunks << write_sorted_chunk(chunk_entries) unless chunk_entries.empty?
|
71
74
|
|
72
75
|
sorted_chunks
|
73
76
|
end
|
data/lib/offline_sort/version.rb
CHANGED
data/lib/offline_sort.rb
CHANGED
data/offline-sort.gemspec
CHANGED
@@ -1,25 +1,31 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require 'offline_sort/version'
|
5
6
|
|
6
7
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
8
|
+
spec.name = 'offline-sort'
|
8
9
|
spec.version = OfflineSort::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
11
|
-
spec.description =
|
12
|
-
spec.summary =
|
13
|
-
spec.homepage =
|
14
|
-
spec.license =
|
10
|
+
spec.authors = ['Matthew Cross']
|
11
|
+
spec.email = ['mcross@salsify.com']
|
12
|
+
spec.description = 'Offline sort for any enumerable with pluggable serialization strategies'
|
13
|
+
spec.summary = 'Offline sort for any enumerable with pluggable serialization strategies'
|
14
|
+
spec.homepage = 'https://github.com/salsify/offline-sort'
|
15
|
+
spec.license = 'MIT'
|
16
|
+
|
17
|
+
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
18
|
+
|
19
|
+
spec.required_ruby_version = '>= 2.6'
|
15
20
|
|
16
|
-
spec.files = `git ls-files`.split($/)
|
17
21
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
22
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
-
spec.require_paths = [
|
23
|
+
spec.require_paths = ['lib']
|
20
24
|
|
21
|
-
spec.add_development_dependency
|
22
|
-
spec.add_development_dependency
|
23
|
-
spec.add_development_dependency
|
24
|
-
spec.add_development_dependency
|
25
|
+
spec.add_development_dependency 'bundler'
|
26
|
+
spec.add_development_dependency 'msgpack'
|
27
|
+
spec.add_development_dependency 'rake'
|
28
|
+
spec.add_development_dependency 'rspec'
|
29
|
+
spec.add_development_dependency 'rspec_junit_formatter'
|
30
|
+
spec.add_development_dependency 'salsify_rubocop', '~> 1.0.1'
|
25
31
|
end
|
@@ -1,16 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'spec_helper'
|
2
4
|
|
3
5
|
shared_examples "a valid chunk input output" do
|
4
6
|
let(:count) { 1000 }
|
5
7
|
|
6
8
|
let(:arrays) do
|
7
|
-
count
|
9
|
+
Array.new(count) do |index|
|
8
10
|
[SecureRandom.hex, index, SecureRandom.hex]
|
9
11
|
end
|
10
12
|
end
|
11
13
|
|
12
14
|
let(:hashes) do
|
13
|
-
count
|
15
|
+
Array.new(count) do |index|
|
14
16
|
{ 'a' => SecureRandom.hex, 'b' => index, 'c' => SecureRandom.hex }
|
15
17
|
end
|
16
18
|
end
|
@@ -21,7 +23,7 @@ shared_examples "a valid chunk input output" do
|
|
21
23
|
t
|
22
24
|
end
|
23
25
|
|
24
|
-
let(:chunk_class) {
|
26
|
+
let(:chunk_class) {}
|
25
27
|
let(:chunk_io) { chunk_class.new(tempfile) }
|
26
28
|
|
27
29
|
describe "#rewind" do
|
@@ -80,16 +82,17 @@ end
|
|
80
82
|
describe OfflineSort::Chunk::InputOutput::Base do
|
81
83
|
let(:io) { Tempfile.new('chunk') }
|
82
84
|
let(:chunk_io) { OfflineSort::Chunk::InputOutput::Base.new(io) }
|
85
|
+
let(:expected_error_klass) { OfflineSort::Chunk::InputOutput::Base::MethodNotImplementedError }
|
83
86
|
|
84
87
|
describe "#read_entry" do
|
85
88
|
it "raises when read_entry is called" do
|
86
|
-
expect { chunk_io.read_entry }.to raise_error(
|
89
|
+
expect { chunk_io.read_entry }.to raise_error(expected_error_klass)
|
87
90
|
end
|
88
91
|
end
|
89
92
|
|
90
93
|
describe "#write_entry" do
|
91
94
|
it "raises when write_entry is called" do
|
92
|
-
expect { chunk_io.write_entry({}) }.to raise_error(
|
95
|
+
expect { chunk_io.write_entry({}) }.to raise_error(expected_error_klass)
|
93
96
|
end
|
94
97
|
end
|
95
98
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'spec_helper'
|
2
4
|
|
3
5
|
describe OfflineSort::FixedSizeMinHeap do
|
@@ -6,14 +8,14 @@ describe OfflineSort::FixedSizeMinHeap do
|
|
6
8
|
|
7
9
|
describe "#initialize" do
|
8
10
|
it "is a a heap" do
|
9
|
-
expect{ assert_min_heap(heap.array) }.not_to raise_error
|
11
|
+
expect { assert_min_heap(heap.array) }.not_to raise_error
|
10
12
|
end
|
11
13
|
end
|
12
14
|
|
13
15
|
describe "#push" do
|
14
16
|
context "with a full array" do
|
15
17
|
it "raises an exception" do
|
16
|
-
expect{ heap.push(rand(20)) }.to raise_error("Heap Size (#{array.size}) Exceeded")
|
18
|
+
expect { heap.push(rand(20)) }.to raise_error("Heap Size (#{array.size}) Exceeded")
|
17
19
|
end
|
18
20
|
end
|
19
21
|
|
@@ -23,7 +25,7 @@ describe OfflineSort::FixedSizeMinHeap do
|
|
23
25
|
end
|
24
26
|
|
25
27
|
it "adds to the heap" do
|
26
|
-
expect{ heap.push(1) }.not_to raise_error
|
28
|
+
expect { heap.push(1) }.not_to raise_error
|
27
29
|
end
|
28
30
|
end
|
29
31
|
|
@@ -34,7 +36,7 @@ describe OfflineSort::FixedSizeMinHeap do
|
|
34
36
|
|
35
37
|
it "adds to the heap" do
|
36
38
|
5.times do
|
37
|
-
expect{ heap.push(1) }.not_to raise_error
|
39
|
+
expect { heap.push(1) }.not_to raise_error
|
38
40
|
end
|
39
41
|
end
|
40
42
|
end
|
@@ -68,7 +70,7 @@ describe OfflineSort::FixedSizeMinHeap do
|
|
68
70
|
100.times do
|
69
71
|
heap.pop
|
70
72
|
heap.push(rand(100))
|
71
|
-
expect{ assert_min_heap(heap.array) }.not_to raise_error
|
73
|
+
expect { assert_min_heap(heap.array) }.not_to raise_error
|
72
74
|
end
|
73
75
|
end
|
74
76
|
end
|
@@ -78,18 +80,16 @@ describe OfflineSort::FixedSizeMinHeap do
|
|
78
80
|
left = (2 * index) + 1
|
79
81
|
right = (2 * index) + 2
|
80
82
|
|
81
|
-
if left < array.size
|
82
|
-
|
83
|
-
|
84
|
-
raise "not a heap"
|
85
|
-
end
|
83
|
+
if left < array.size && array[left] < e
|
84
|
+
puts "left #{e} #{array}"
|
85
|
+
raise 'not a heap'
|
86
86
|
end
|
87
87
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
88
|
+
next unless right < array.size
|
89
|
+
|
90
|
+
unless array[right] >= e
|
91
|
+
puts "right #{e} #{array}"
|
92
|
+
raise 'not a heap'
|
93
93
|
end
|
94
94
|
end
|
95
95
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'spec_helper'
|
2
4
|
|
3
5
|
describe OfflineSort::Sorter do
|
@@ -11,11 +13,11 @@ describe OfflineSort::Sorter do
|
|
11
13
|
before do
|
12
14
|
@unsorted = enumerable.dup
|
13
15
|
r = Benchmark.measure do
|
14
|
-
|
16
|
+
result = OfflineSort.sort(enumerable, chunk_size: entries_per_chunk, &sort)
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
|
18
|
+
@sorted = result.map do |entry|
|
19
|
+
entry
|
20
|
+
end
|
19
21
|
end
|
20
22
|
puts r
|
21
23
|
end
|
@@ -32,9 +34,7 @@ describe OfflineSort::Sorter do
|
|
32
34
|
next
|
33
35
|
end
|
34
36
|
|
35
|
-
unless (
|
36
|
-
raise "Out of order at line #{entry_count}"
|
37
|
-
end
|
37
|
+
raise "Out of order at line #{entry_count}" unless (sort.call(last) <=> sort.call(entry)) == -1
|
38
38
|
|
39
39
|
last = entry
|
40
40
|
entry_count += 1
|
@@ -45,7 +45,7 @@ describe OfflineSort::Sorter do
|
|
45
45
|
end
|
46
46
|
|
47
47
|
let(:arrays) do
|
48
|
-
count
|
48
|
+
Array.new(count) do |index|
|
49
49
|
[SecureRandom.hex, index, SecureRandom.hex]
|
50
50
|
end
|
51
51
|
end
|
@@ -54,7 +54,7 @@ describe OfflineSort::Sorter do
|
|
54
54
|
let(:array_sort) { Proc.new { |arr| arr[array_sort_index] } }
|
55
55
|
|
56
56
|
let(:hashes) do
|
57
|
-
count
|
57
|
+
Array.new(count) do |index|
|
58
58
|
{ 'a' => SecureRandom.hex, 'b' => index, 'c' => SecureRandom.hex }
|
59
59
|
end
|
60
60
|
end
|
@@ -72,7 +72,7 @@ describe OfflineSort::Sorter do
|
|
72
72
|
context "with multiple sort keys" do
|
73
73
|
it_behaves_like "a correct offline sort" do
|
74
74
|
let(:enumerable) do
|
75
|
-
count
|
75
|
+
Array.new(count) do |index|
|
76
76
|
[index.round(-1), index, SecureRandom.hex]
|
77
77
|
end.shuffle
|
78
78
|
end
|
@@ -90,7 +90,7 @@ describe OfflineSort::Sorter do
|
|
90
90
|
context "with multiple sort keys" do
|
91
91
|
it_behaves_like "a correct offline sort" do
|
92
92
|
let(:enumerable) do
|
93
|
-
count
|
93
|
+
Array.new(count) do |index|
|
94
94
|
{ 'a' => index.round(-1), 'b' => index, 'c' => SecureRandom.hex }
|
95
95
|
end.shuffle
|
96
96
|
end
|
@@ -99,4 +99,3 @@ describe OfflineSort::Sorter do
|
|
99
99
|
end
|
100
100
|
end
|
101
101
|
end
|
102
|
-
|
data/spec/spec_helper.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __dir__)
|
2
4
|
require 'securerandom'
|
3
5
|
require 'benchmark'
|
4
6
|
require 'msgpack'
|
7
|
+
require 'tempfile'
|
5
8
|
|
6
9
|
require 'offline_sort'
|
7
|
-
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: offline-sort
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matthew Cross
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
@@ -25,19 +25,19 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: msgpack
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +66,34 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec_junit_formatter
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: salsify_rubocop
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 1.0.1
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 1.0.1
|
69
97
|
description: Offline sort for any enumerable with pluggable serialization strategies
|
70
98
|
email:
|
71
99
|
- mcross@salsify.com
|
@@ -73,6 +101,11 @@ executables: []
|
|
73
101
|
extensions: []
|
74
102
|
extra_rdoc_files: []
|
75
103
|
files:
|
104
|
+
- ".circleci/config.yml"
|
105
|
+
- ".gitignore"
|
106
|
+
- ".rspec"
|
107
|
+
- ".rubocop.yml"
|
108
|
+
- CHANGELOG.md
|
76
109
|
- Gemfile
|
77
110
|
- LICENSE.txt
|
78
111
|
- README.md
|
@@ -92,13 +125,13 @@ files:
|
|
92
125
|
- spec/offline_sort/chunk/input_output/message_pack_spec.rb
|
93
126
|
- spec/offline_sort/chunk/input_output/yaml_spec.rb
|
94
127
|
- spec/offline_sort/fixed_size_min_heap_spec.rb
|
95
|
-
- spec/offline_sort/
|
128
|
+
- spec/offline_sort/sorter_spec.rb
|
96
129
|
- spec/spec_helper.rb
|
97
130
|
homepage: https://github.com/salsify/offline-sort
|
98
131
|
licenses:
|
99
132
|
- MIT
|
100
133
|
metadata: {}
|
101
|
-
post_install_message:
|
134
|
+
post_install_message:
|
102
135
|
rdoc_options: []
|
103
136
|
require_paths:
|
104
137
|
- lib
|
@@ -106,16 +139,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
106
139
|
requirements:
|
107
140
|
- - ">="
|
108
141
|
- !ruby/object:Gem::Version
|
109
|
-
version: '
|
142
|
+
version: '2.6'
|
110
143
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
144
|
requirements:
|
112
145
|
- - ">="
|
113
146
|
- !ruby/object:Gem::Version
|
114
147
|
version: '0'
|
115
148
|
requirements: []
|
116
|
-
|
117
|
-
|
118
|
-
signing_key:
|
149
|
+
rubygems_version: 3.1.4
|
150
|
+
signing_key:
|
119
151
|
specification_version: 4
|
120
152
|
summary: Offline sort for any enumerable with pluggable serialization strategies
|
121
153
|
test_files:
|
@@ -124,5 +156,5 @@ test_files:
|
|
124
156
|
- spec/offline_sort/chunk/input_output/message_pack_spec.rb
|
125
157
|
- spec/offline_sort/chunk/input_output/yaml_spec.rb
|
126
158
|
- spec/offline_sort/fixed_size_min_heap_spec.rb
|
127
|
-
- spec/offline_sort/
|
159
|
+
- spec/offline_sort/sorter_spec.rb
|
128
160
|
- spec/spec_helper.rb
|