diff_set 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: deaf2800e3064f34e2e8b253ddec8cae53d29951
4
+ data.tar.gz: d61484cba1501f4f453996829e2347698b28b77a
5
+ SHA512:
6
+ metadata.gz: 74d052ee5da4650f47ab40a1d491cc14678597b76a4110368daf09b85680de9cf075077a7d16c86e46fe359d0c14797b8f2ff2e8313c6d8b0c2429384992f360
7
+ data.tar.gz: 80e99fa750b79cae19630c94bb337425b62a2bd2b3f2452a0b7359ce2db5bb0c96574b23141178e079c2eb3b3995815b4012aa23ad06b0ffdde96a71828fa6f2
data/.gitignore ADDED
@@ -0,0 +1,24 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .DS_Store
19
+ .rvmrc
20
+ .ruby-version
21
+ *.o
22
+ Makefile
23
+ *.bundle
24
+ *.so
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Michael Parrish
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,46 @@
1
+ # DiffSet
2
+
3
+ DiffSet contains a collection of data structures optimized to perform partial set subtractions.
4
+
5
+ - `DiffSet::RandomSet` Produces a randomized set difference
6
+
7
+ - `DiffSet::PrioritySet` Produces an ordered set difference
8
+
9
+ - `DiffSet::PairwiseRandomSet` Presents a random set difference as a list of pairs
10
+
11
+ - `DiffSet::PairwisePrioritySet` Presents an ordered set difference as a list of pairs
12
+
13
+ ## Installation
14
+
15
+ 1. Install [Boost](http://www.boost.org/):
16
+
17
+ - OS X: `brew update && brew install boost`
18
+
19
+ - Ubuntu: `sudo apt-get update && sudo apt-get install libboost-all-dev`
20
+
21
+ 3. Add this line to your application's Gemfile: `gem 'diff_set'`
22
+
23
+ 4. And then execute: `bundle`
24
+
25
+ To install rice **Ruby must be compiled with shared libraries enabled**
26
+
27
+ - rvm: `rvm reinstall [version] -- --enable-shared`
28
+
29
+ - rbenv: `CONFIGURE_OPTS="--enable-shared" rbenv install [version]`
30
+
31
+
32
+ ## Usage
33
+
34
+ The API is pretty straightforward, and [the specs](https://github.com/parrish/diff_set/tree/master/spec) have examples.
35
+
36
+ ## Testing
37
+
38
+ Run the specs with `rake`
39
+
40
+ ## Contributing
41
+
42
+ 1. Fork it ( http://github.com/parrish/diff_set/fork )
43
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
44
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
45
+ 4. Push to the branch (`git push origin my-new-feature`)
46
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,20 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/extensiontask'
3
+ require 'rspec/core/rake_task'
4
+ require 'rubygems/package_task'
5
+
6
+ GEMSPEC = Gem::Specification.load 'diff_set.gemspec'
7
+
8
+ Gem::PackageTask.new(GEMSPEC) do |pkg|
9
+ pkg.need_zip = true
10
+ pkg.need_tar = true
11
+ end
12
+
13
+ Rake::ExtensionTask.new('diff_set_ext', GEMSPEC) do |ext|
14
+ ext.ext_dir = 'ext/diff_set'
15
+ ext.lib_dir = 'lib/diff_set'
16
+ ext.source_pattern = '*.{h,cpp}'
17
+ end
18
+
19
+ RSpec::Core::RakeTask.new :spec
20
+ task default: [:compile, :spec]
data/diff_set.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'diff_set/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'diff_set'
8
+ spec.version = DiffSet::VERSION
9
+ spec.authors = ['Michael Parrish']
10
+ spec.email = ['michael@zooniverse.org']
11
+ spec.summary = 'DiffSet contains a collection of data structures optimized to perform partial set subtractions'
12
+ spec.description = ''
13
+ spec.homepage = 'https://github.com/parrish/diff_set'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+ spec.extensions = ['ext/diff_set/extconf.rb']
21
+
22
+ spec.add_development_dependency 'bundler', '~> 1.5'
23
+ spec.add_development_dependency 'rake'
24
+ spec.add_development_dependency 'rake-compiler', '~> 0.9.2'
25
+ spec.add_development_dependency 'rspec'
26
+ spec.add_development_dependency 'pry'
27
+ spec.add_runtime_dependency 'rice', '~> 1.6'
28
+ end
@@ -0,0 +1,39 @@
1
+ #include "rice/Class.hpp"
2
+ #include "rice/Module.hpp"
3
+ #include "rice/ruby_try_catch.hpp"
4
+ #include "rice/Data_Type.hpp"
5
+ #include "rice/Constructor.hpp"
6
+ using namespace Rice;
7
+
8
+ #include "random_set.h"
9
+ #include "priority_set.h"
10
+
11
+ extern "C"
12
+ void Init_diff_set_ext() {
13
+ RUBY_TRY
14
+ {
15
+ Module rb_mDiffSet = define_module("DiffSet");
16
+
17
+ Data_Type<RandomSet> rb_cRandomSet = define_class_under<RandomSet>(rb_mDiffSet, "RandomSet")
18
+ .define_constructor(Constructor<RandomSet>())
19
+ .define_method("add", &RandomSet::add, (Arg("id"), Arg("priority") = 0.0))
20
+ .define_method("remove", &RandomSet::remove)
21
+ .define_method("sample", &RandomSet::sample)
22
+ .define_method("subtract", &RandomSet::subtract)
23
+ .define_method("include?", &RandomSet::includes)
24
+ .define_method("to_a", &RandomSet::to_a)
25
+ .define_method("size", &RandomSet::size);
26
+
27
+ Data_Type<PrioritySet> rb_cPrioritySet = define_class_under<PrioritySet>(rb_mDiffSet, "PrioritySet")
28
+ .define_constructor(Constructor<PrioritySet>())
29
+ .define_method("add", &PrioritySet::add, (Arg("id"), Arg("priority") = 0.0))
30
+ .define_method("remove", &PrioritySet::remove)
31
+ .define_method("sample", &PrioritySet::sample)
32
+ .define_method("subtract", &PrioritySet::subtract)
33
+ .define_method("include?", &PrioritySet::includes)
34
+ .define_method("to_a", &PrioritySet::to_a)
35
+ .define_method("to_h", &PrioritySet::to_h)
36
+ .define_method("size", &PrioritySet::size);
37
+ }
38
+ RUBY_CATCH
39
+ }
@@ -0,0 +1,4 @@
1
+ require 'mkmf-rice'
2
+
3
+ extension_name = 'diff_set'
4
+ create_makefile 'diff_set/diff_set_ext'
@@ -0,0 +1,99 @@
1
+ #include "priority_set.h"
2
+
3
+ PrioritySet::PrioritySet() {
4
+ timeval time;
5
+ gettimeofday(&time, NULL);
6
+ long millis = (time.tv_sec * 1000.0) + (time.tv_usec / 1000.0);
7
+ this->rng.seed((uint)millis);
8
+ }
9
+
10
+ void PrioritySet::add(int id, double priority) {
11
+ if(includes(id)) {
12
+ element_handle handle = this->element_handles[id];
13
+ (*handle).priority = priority;
14
+ this->heap.update(handle);
15
+ return;
16
+ }
17
+
18
+ static boost::uniform_01<boost::random::mt19937> dist(this->rng);
19
+ element_handle handle = this->heap.push(element(id, priority, dist()));
20
+ this->element_handles.insert(std::make_pair<int, element_handle>(id, handle));
21
+ this->element_set.insert(id);
22
+ }
23
+
24
+ void PrioritySet::remove(int id) {
25
+ boost::unordered_set<int>::iterator set_it = this->element_set.find(id);
26
+
27
+ if(set_it != this->element_set.end()) {
28
+ this->element_set.erase(set_it);
29
+ element_handle handle = this->element_handles[id];
30
+ (*handle).priority = std::numeric_limits<int>::min();
31
+ (*handle).enabled = false;
32
+ this->heap.decrease(handle);
33
+ }
34
+ }
35
+
36
+ Array PrioritySet::sample(int limit) {
37
+ Array sampled;
38
+ fibonacci_heap::ordered_iterator it;
39
+
40
+ for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
41
+ if(it->enabled) {
42
+ sampled.push(it->id);
43
+ if(sampled.size() >= (size_t)limit) {
44
+ break;
45
+ }
46
+ }
47
+ }
48
+
49
+ return sampled;
50
+ }
51
+
52
+ bool PrioritySet::includes(int id) {
53
+ boost::unordered_set<int>::const_iterator it;
54
+ it = this->element_set.find(id);
55
+ return it != this->element_set.end();
56
+ }
57
+
58
+ Array PrioritySet::subtract(RandomSet &other, size_t limit) {
59
+ Array diff;
60
+ fibonacci_heap::ordered_iterator it;
61
+ boost::unordered_set<int>::const_iterator in_other;
62
+
63
+ for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
64
+ if(it->enabled && other.element_set.find(it->id) == other.element_set.end()) {
65
+ diff.push(it->id);
66
+ if(diff.size() >= limit) {
67
+ break;
68
+ }
69
+ }
70
+ }
71
+
72
+ return diff;
73
+ }
74
+
75
+ Array PrioritySet::to_a() {
76
+ Array array;
77
+ fibonacci_heap::ordered_iterator it;
78
+ for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
79
+ if(it->enabled) {
80
+ array.push(it->id);
81
+ }
82
+ }
83
+ return array;
84
+ }
85
+
86
+ Hash PrioritySet::to_h() {
87
+ Hash hash;
88
+ fibonacci_heap::ordered_iterator it;
89
+ for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
90
+ if(it->enabled) {
91
+ hash[it->id] = it->priority;
92
+ }
93
+ }
94
+ return hash;
95
+ }
96
+
97
+ size_t PrioritySet::size() {
98
+ return this->element_set.size();
99
+ }
@@ -0,0 +1,59 @@
1
+ #ifndef PRIORITY_SET_H
2
+ #define PRIORITY_SET_H
3
+
4
+ #include "rice/Object.hpp"
5
+ #include "rice/Array.hpp"
6
+ #include "rice/Hash.hpp"
7
+ using namespace Rice;
8
+
9
+ #include <boost/random.hpp>
10
+ #include <boost/heap/fibonacci_heap.hpp>
11
+ #include <boost/unordered_set.hpp>
12
+ #include <boost/unordered_map.hpp>
13
+
14
+ #include <sys/time.h>
15
+ #include <limits>
16
+
17
+ #include "random_set.h"
18
+
19
+ class PrioritySet {
20
+ public:
21
+ struct element {
22
+ int id;
23
+ double priority;
24
+ double random;
25
+ bool enabled;
26
+
27
+ element(int id, double priority, double random) {
28
+ enabled = true;
29
+ this->id = id;
30
+ this->priority = priority;
31
+ this->random = random;
32
+ }
33
+ };
34
+
35
+ struct comparator {
36
+ bool operator()(const element &a, const element &b) const {
37
+ return (a.priority < b.priority) || (a.priority == b.priority && a.random < b.random);
38
+ }
39
+ };
40
+
41
+ PrioritySet();
42
+ void add(int id, double priority = 0.0);
43
+ void remove(int id);
44
+ Array sample(int limit);
45
+ bool includes(int id);
46
+ Array subtract(RandomSet &other, size_t limit);
47
+ Array to_a();
48
+ Hash to_h();
49
+ size_t size();
50
+ protected:
51
+ typedef boost::heap::fibonacci_heap<element, boost::heap::compare<comparator> > fibonacci_heap;
52
+ typedef fibonacci_heap::handle_type element_handle;
53
+ fibonacci_heap heap;
54
+ boost::unordered_map<int, element_handle > element_handles;
55
+ boost::unordered_set<int> element_set;
56
+ boost::random::mt19937 rng;
57
+ };
58
+
59
+ #endif
@@ -0,0 +1,108 @@
1
+ #include "random_set.h"
2
+
3
+ RandomSet::RandomSet() {
4
+ timeval time;
5
+ gettimeofday(&time, NULL);
6
+ long millis = (time.tv_sec * 1000.0) + (time.tv_usec / 1000.0);
7
+ this->rng.seed((uint)millis);
8
+ }
9
+
10
+ void RandomSet::add(int element, double priority) {
11
+ this->element_set.insert(element);
12
+ this->elements.push_back(element);
13
+ }
14
+
15
+ void RandomSet::remove(int element) {
16
+ boost::unordered_set<int>::iterator set_it = this->element_set.find(element);
17
+
18
+ if(set_it != this->element_set.end()) {
19
+ this->element_set.erase(set_it);
20
+
21
+ std::vector<int>::iterator it = iterator_to(element);
22
+ if(it != this->elements.end()) {
23
+ this->elements.erase(it);
24
+ }
25
+ }
26
+ }
27
+
28
+ Array RandomSet::sample(int limit) {
29
+ Array sampled;
30
+ int swapIndex;
31
+ int tmp;
32
+ int upper_bound = (int)this->elements.size() - 1;
33
+ boost::random::uniform_int_distribution<> dist;
34
+
35
+ for(int i = 0; i < limit && i < (int)this->elements.size(); i++) {
36
+ dist = boost::random::uniform_int_distribution<>(std::min(i + 1, upper_bound), upper_bound);
37
+ swapIndex = dist(rng);
38
+ tmp = this->elements[i];
39
+ this->elements[i] = this->elements[swapIndex];
40
+ this->elements[swapIndex] = tmp;
41
+ sampled.push(this->elements[i]);
42
+ }
43
+
44
+ return sampled;
45
+ }
46
+
47
+ bool RandomSet::includes(int element) {
48
+ boost::unordered_set<int>::const_iterator it;
49
+ it = this->element_set.find(element);
50
+ return it != this->element_set.end();
51
+ }
52
+
53
+ std::vector<int>::iterator RandomSet::iterator_to(int element) {
54
+ std::vector<int>::iterator it;
55
+ for(it = this->elements.begin(); it != this->elements.end(); it++) {
56
+ if(element == *it) {
57
+ return it;
58
+ }
59
+ }
60
+
61
+ return this->elements.end();
62
+ }
63
+
64
+ Array RandomSet::subtract(RandomSet &other, size_t limit) {
65
+ Array diff;
66
+ int element;
67
+ int swapIndex;
68
+ int tmp;
69
+ int upper_bound = (int)this->elements.size() - 1;
70
+
71
+ boost::unordered_set<int>::const_iterator in_other;
72
+ boost::random::uniform_int_distribution<> dist;
73
+
74
+ for(int i = 0; i < (int)this->elements.size(); i++) {
75
+ dist = boost::random::uniform_int_distribution<>(std::min(i + 1, upper_bound), upper_bound);
76
+ swapIndex = dist(rng);
77
+ tmp = this->elements[i];
78
+ this->elements[i] = this->elements[swapIndex];
79
+ this->elements[swapIndex] = tmp;
80
+
81
+ element = this->elements[i];
82
+ in_other = other.element_set.find(element);
83
+
84
+ if(in_other == other.element_set.end()) {
85
+ diff.push(element);
86
+ if(diff.size() >= limit) {
87
+ break;
88
+ }
89
+ }
90
+ }
91
+
92
+ return diff;
93
+ }
94
+
95
+ Array RandomSet::to_a() {
96
+ Array array;
97
+
98
+ std::vector<int>::iterator it;
99
+ for(it = this->elements.begin(); it != this->elements.end(); it++) {
100
+ array.push(*it);
101
+ }
102
+
103
+ return array;
104
+ }
105
+
106
+ size_t RandomSet::size() {
107
+ return this->elements.size();
108
+ }
@@ -0,0 +1,33 @@
1
+ #ifndef RANDOM_SET_H
2
+ #define RANDOM_SET_H
3
+
4
+ #include "rice/Object.hpp"
5
+ #include "rice/Array.hpp"
6
+ using namespace Rice;
7
+
8
+ #include <boost/random/mersenne_twister.hpp>
9
+ #include <boost/random/uniform_int_distribution.hpp>
10
+ #include <boost/unordered_set.hpp>
11
+
12
+ #include <vector>
13
+ #include <sys/time.h>
14
+
15
+ class RandomSet {
16
+ public:
17
+ RandomSet();
18
+ void add(int element, double priority = 0.0);
19
+ void remove(int element);
20
+ Array sample(int limit);
21
+ bool includes(int element);
22
+ Array subtract(RandomSet &other, size_t limit);
23
+ Array to_a();
24
+ size_t size();
25
+ boost::unordered_set<int> element_set;
26
+ protected:
27
+ std::vector<int> elements;
28
+ boost::random::mt19937 rng;
29
+
30
+ std::vector<int>::iterator iterator_to(int element);
31
+ };
32
+
33
+ #endif
@@ -0,0 +1,23 @@
1
+ module DiffSet
2
+ module Pairwise
3
+ def self.included(klass)
4
+ klass.class_eval do
5
+ alias_method :_c_subtract, :subtract
6
+ def subtract(set, limit)
7
+ _in_pairs _c_subtract(set, 2 * limit)
8
+ end
9
+
10
+ alias_method :_c_sample, :sample
11
+ def sample(limit)
12
+ _in_pairs _c_sample(2 * limit)
13
+ end
14
+ end
15
+ end
16
+
17
+ protected
18
+
19
+ def _in_pairs(list)
20
+ list.each_slice(2).to_a.reject{ |pair| pair.length != 2 }
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,5 @@
1
+ module DiffSet
2
+ class PairwisePrioritySet < PrioritySet
3
+ include Pairwise
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module DiffSet
2
+ class PairwiseRandomSet < RandomSet
3
+ include Pairwise
4
+ end
5
+ end
@@ -0,0 +1,3 @@
1
+ module DiffSet
2
+ VERSION = '0.0.2'
3
+ end
data/lib/diff_set.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'diff_set/version'
2
+ require 'diff_set/diff_set_ext'
3
+
4
+ require 'diff_set/pairwise'
5
+ require 'diff_set/pairwise_random_set'
6
+ require 'diff_set/pairwise_priority_set'
7
+
8
+ module DiffSet
9
+
10
+ end
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffSet
4
+ describe PairwisePrioritySet do
5
+ it_behaves_like 'a set'
6
+
7
+ let(:set) do
8
+ ids = (1..5).to_a
9
+ priorities = ids.reverse
10
+
11
+ PairwisePrioritySet.new.tap do |priority_set|
12
+ ids.zip(priorities).each{ |id, priority| priority_set.add id, priority }
13
+ end
14
+ end
15
+
16
+ let(:other_set) do
17
+ RandomSet.new.tap do |random_set|
18
+ 1.upto(3).each{ |i| random_set.add i }
19
+ end
20
+ end
21
+
22
+ it 'should sample elements in order' do
23
+ set.sample(2).should == [[1, 2], [3, 4]]
24
+ set.sample(3).length.should == 2
25
+ end
26
+
27
+ it 'should not include removed elements in subtractions' do
28
+ set.add 6, 0
29
+ set.subtract(other_set, 5).flatten.should == [4, 5]
30
+ set.remove 5
31
+ set.subtract(other_set, 5).flatten.should == [4, 6]
32
+ end
33
+
34
+ it 'should subtract another set' do
35
+ set.subtract(other_set, 5).length.should == 1
36
+ set.subtract(other_set, 5).first.should == [4, 5]
37
+ end
38
+
39
+ it 'should always return pairs' do
40
+ 6.upto(8).each{ |i| set.add i, rand }
41
+ set.subtract(other_set, 3).each{ |pair| pair.length.should == 2 }
42
+ end
43
+
44
+ it 'should update the priority' do
45
+ set.to_a.first.should == 1
46
+ set.to_h[1].should be_within(0.1).of(5)
47
+ set.add 1, 0
48
+ set.to_a.first.should == 2
49
+ set.to_a.last.should == 1
50
+ set.to_h[1].should be_within(0.1).of(0)
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffSet
4
+ describe PairwiseRandomSet do
5
+ it_behaves_like 'a set'
6
+
7
+ def create_set(elements)
8
+ PairwiseRandomSet.new.tap do |random_set|
9
+ 1.upto(elements).each{ |i| random_set.add i }
10
+ end
11
+ end
12
+
13
+ let(:set){ create_set(5) }
14
+ let(:other_set){ create_set(3) }
15
+
16
+ it 'should sample pairs of elements' do
17
+ set.sample(2).collect(&:length).should == [2, 2]
18
+ set.sample(3).length.should == 2
19
+ end
20
+
21
+ it 'should not include removed elements in subtractions' do
22
+ set.remove 5
23
+ set.subtract(other_set, 5).flatten.should_not include 5
24
+ end
25
+
26
+ it 'should subtract another set' do
27
+ set.subtract(other_set, 5).length.should == 1
28
+ set.subtract(other_set, 5).first.should =~ [4, 5]
29
+ end
30
+
31
+ it 'should always return pairs' do
32
+ 6.upto(8).each{ |i| set.add i }
33
+ set.subtract(other_set, 3).each{ |pair| pair.length.should == 2 }
34
+ end
35
+
36
+ it 'should mutate the order of the elements when sampling' do
37
+ set_before = set.to_a
38
+ set.sample 5
39
+ set_before.should =~ set.to_a
40
+ set_before.should_not == set.to_a
41
+ end
42
+
43
+ it 'should mutate the order of the elements on a subtraction' do
44
+ set_before = set.to_a
45
+ set.subtract other_set, 5
46
+ set_before.should =~ set.to_a
47
+ set_before.should_not == set.to_a
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,45 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffSet
4
+ describe PrioritySet do
5
+ it_behaves_like 'a set'
6
+
7
+ let(:set) do
8
+ ids = (1..5).to_a
9
+ priorities = ids.reverse
10
+
11
+ PrioritySet.new.tap do |priority_set|
12
+ ids.zip(priorities).each{ |id, priority| priority_set.add id, priority }
13
+ end
14
+ end
15
+
16
+ let(:other_set) do
17
+ RandomSet.new.tap do |random_set|
18
+ 1.upto(3).each{ |i| random_set.add i }
19
+ end
20
+ end
21
+
22
+ it 'should sample elements in order' do
23
+ set.sample(5).should == (1..5).to_a
24
+ end
25
+
26
+ it 'should subtract another set' do
27
+ set.subtract(other_set, 5).should == [4, 5]
28
+ set.subtract(other_set, 1).first.should == 4
29
+ end
30
+
31
+ it 'should not include removed elements in subtractions' do
32
+ set.remove 5
33
+ set.subtract(other_set, 5).should == [4]
34
+ end
35
+
36
+ it 'should update the priority' do
37
+ set.to_a.first.should == 1
38
+ set.to_h[1].should be_within(0.1).of(5)
39
+ set.add 1, 0
40
+ set.to_a.first.should == 2
41
+ set.to_a.last.should == 1
42
+ set.to_h[1].should be_within(0.1).of(0)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,40 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffSet
4
+ describe RandomSet do
5
+ it_behaves_like 'a set'
6
+
7
+ def create_set(elements)
8
+ RandomSet.new.tap do |random_set|
9
+ 1.upto(elements).each{ |i| random_set.add i }
10
+ end
11
+ end
12
+
13
+ let(:set){ create_set(5) }
14
+ let(:other_set){ create_set(3) }
15
+
16
+ it 'should subtract another set' do
17
+ set.subtract(other_set, 5).should =~ [4, 5]
18
+ [4, 5].should include set.subtract(other_set, 1).first
19
+ end
20
+
21
+ it 'should not include removed elements in subtractions' do
22
+ set.remove 5
23
+ set.subtract(other_set, 5).should == [4]
24
+ end
25
+
26
+ it 'should mutate the order of the elements when sampling' do
27
+ set_before = set.to_a
28
+ set.sample 5
29
+ set_before.should =~ set.to_a
30
+ set_before.should_not == set.to_a
31
+ end
32
+
33
+ it 'should mutate the order of the elements on a subtraction' do
34
+ set_before = set.to_a
35
+ set.subtract other_set, 5
36
+ set_before.should =~ set.to_a
37
+ set_before.should_not == set.to_a
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,17 @@
1
+ root = File.expand_path File.join(File.dirname(__FILE__), '../')
2
+ %w(lib ext).each do |name|
3
+ dir = File.join root, name
4
+ $LOAD_PATH.unshift dir unless $LOAD_PATH.include? dir
5
+ end
6
+
7
+ require 'pry'
8
+ require 'diff_set'
9
+
10
+ Dir["./spec/support/**/*.rb"].sort.each{ |f| require f }
11
+
12
+ RSpec.configure do |config|
13
+ config.treat_symbols_as_metadata_keys_with_true_values = true
14
+ config.run_all_when_everything_filtered = true
15
+ config.filter_run :focus
16
+ config.order = 'random'
17
+ end
@@ -0,0 +1,34 @@
1
+ shared_examples_for 'a set' do
2
+ it 'should convert to an Array' do
3
+ set.to_a.should =~ (1..5).to_a
4
+ end
5
+
6
+ it 'should add elements' do
7
+ set.add 100
8
+ set.to_a.should include 100
9
+ end
10
+
11
+ it 'should remove elements' do
12
+ set.remove 1
13
+ set.to_a.should_not include 1
14
+ end
15
+
16
+ it 'should sample elements' do
17
+ set.sample(2).length.should == 2
18
+ end
19
+
20
+ it 'should not include removed elements in samples' do
21
+ set.remove 5
22
+ set.sample(5).should_not include 5
23
+ end
24
+
25
+ it 'should know how many elements it contains' do
26
+ expect{ set.add 100 }.to change{ set.size }.from(5).to 6
27
+ end
28
+
29
+ it 'should know if it contains an element' do
30
+ set.should_not include 100
31
+ set.add 100
32
+ set.should include 100
33
+ end
34
+ end
metadata ADDED
@@ -0,0 +1,160 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: diff_set
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Michael Parrish
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.9.2
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.9.2
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rice
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.6'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.6'
97
+ description: ''
98
+ email:
99
+ - michael@zooniverse.org
100
+ executables: []
101
+ extensions:
102
+ - ext/diff_set/extconf.rb
103
+ extra_rdoc_files: []
104
+ files:
105
+ - ".gitignore"
106
+ - ".rspec"
107
+ - Gemfile
108
+ - LICENSE.txt
109
+ - README.md
110
+ - Rakefile
111
+ - diff_set.gemspec
112
+ - ext/diff_set/diff_set_ext.cpp
113
+ - ext/diff_set/extconf.rb
114
+ - ext/diff_set/priority_set.cpp
115
+ - ext/diff_set/priority_set.h
116
+ - ext/diff_set/random_set.cpp
117
+ - ext/diff_set/random_set.h
118
+ - lib/diff_set.rb
119
+ - lib/diff_set/pairwise.rb
120
+ - lib/diff_set/pairwise_priority_set.rb
121
+ - lib/diff_set/pairwise_random_set.rb
122
+ - lib/diff_set/version.rb
123
+ - spec/pairwise_priority_set_spec.rb
124
+ - spec/pairwise_random_set_spec.rb
125
+ - spec/priority_set_spec.rb
126
+ - spec/random_set_spec.rb
127
+ - spec/spec_helper.rb
128
+ - spec/support/shared_examples_for_set.rb
129
+ homepage: https://github.com/parrish/diff_set
130
+ licenses:
131
+ - MIT
132
+ metadata: {}
133
+ post_install_message:
134
+ rdoc_options: []
135
+ require_paths:
136
+ - lib
137
+ required_ruby_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ required_rubygems_version: !ruby/object:Gem::Requirement
143
+ requirements:
144
+ - - ">="
145
+ - !ruby/object:Gem::Version
146
+ version: '0'
147
+ requirements: []
148
+ rubyforge_project:
149
+ rubygems_version: 2.2.2
150
+ signing_key:
151
+ specification_version: 4
152
+ summary: DiffSet contains a collection of data structures optimized to perform partial
153
+ set subtractions
154
+ test_files:
155
+ - spec/pairwise_priority_set_spec.rb
156
+ - spec/pairwise_random_set_spec.rb
157
+ - spec/priority_set_spec.rb
158
+ - spec/random_set_spec.rb
159
+ - spec/spec_helper.rb
160
+ - spec/support/shared_examples_for_set.rb