diff_set 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: deaf2800e3064f34e2e8b253ddec8cae53d29951
4
+ data.tar.gz: d61484cba1501f4f453996829e2347698b28b77a
5
+ SHA512:
6
+ metadata.gz: 74d052ee5da4650f47ab40a1d491cc14678597b76a4110368daf09b85680de9cf075077a7d16c86e46fe359d0c14797b8f2ff2e8313c6d8b0c2429384992f360
7
+ data.tar.gz: 80e99fa750b79cae19630c94bb337425b62a2bd2b3f2452a0b7359ce2db5bb0c96574b23141178e079c2eb3b3995815b4012aa23ad06b0ffdde96a71828fa6f2
data/.gitignore ADDED
@@ -0,0 +1,24 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .DS_Store
19
+ .rvmrc
20
+ .ruby-version
21
+ *.o
22
+ Makefile
23
+ *.bundle
24
+ *.so
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Michael Parrish
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,46 @@
1
+ # DiffSet
2
+
3
+ DiffSet contains a collection of data structures optimized to perform partial set subtractions.
4
+
5
+ - `DiffSet::RandomSet` Produces a randomized set difference
6
+
7
+ - `DiffSet::PrioritySet` Produces an ordered set difference
8
+
9
+ - `DiffSet::PairwiseRandomSet` Presents a random set difference as a list of pairs
10
+
11
+ - `DiffSet::PairwisePrioritySet` Presents an ordered set difference as a list of pairs
12
+
13
+ ## Installation
14
+
15
+ 1. Install [Boost](http://www.boost.org/):
16
+
17
+ - OS X: `brew update && brew install boost`
18
+
19
+ - Ubuntu: `sudo apt-get update && sudo apt-get install libboost-all-dev`
20
+
21
+ 3. Add this line to your application's Gemfile: `gem 'diff_set'`
22
+
23
+ 4. And then execute: `bundle`
24
+
25
+ To install rice **Ruby must be compiled with shared libraries enabled**
26
+
27
+ - rvm: `rvm reinstall [version] -- --enable-shared`
28
+
29
+ - rbenv: `CONFIGURE_OPTS="--enable-shared" rbenv install [version]`
30
+
31
+
32
+ ## Usage
33
+
34
+ The API is pretty straightforward, and [the specs](https://github.com/parrish/diff_set/tree/master/spec) have examples.
35
+
36
+ ## Testing
37
+
38
+ Run the specs with `rake`
39
+
40
+ ## Contributing
41
+
42
+ 1. Fork it ( http://github.com/parrish/diff_set/fork )
43
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
44
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
45
+ 4. Push to the branch (`git push origin my-new-feature`)
46
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,20 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/extensiontask'
3
+ require 'rspec/core/rake_task'
4
+ require 'rubygems/package_task'
5
+
6
+ GEMSPEC = Gem::Specification.load 'diff_set.gemspec'
7
+
8
+ Gem::PackageTask.new(GEMSPEC) do |pkg|
9
+ pkg.need_zip = true
10
+ pkg.need_tar = true
11
+ end
12
+
13
+ Rake::ExtensionTask.new('diff_set_ext', GEMSPEC) do |ext|
14
+ ext.ext_dir = 'ext/diff_set'
15
+ ext.lib_dir = 'lib/diff_set'
16
+ ext.source_pattern = '*.{h,cpp}'
17
+ end
18
+
19
+ RSpec::Core::RakeTask.new :spec
20
+ task default: [:compile, :spec]
data/diff_set.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'diff_set/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'diff_set'
8
+ spec.version = DiffSet::VERSION
9
+ spec.authors = ['Michael Parrish']
10
+ spec.email = ['michael@zooniverse.org']
11
+ spec.summary = 'DiffSet contains a collection of data structures optimized to perform partial set subtractions'
12
+ spec.description = ''
13
+ spec.homepage = 'https://github.com/parrish/diff_set'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+ spec.extensions = ['ext/diff_set/extconf.rb']
21
+
22
+ spec.add_development_dependency 'bundler', '~> 1.5'
23
+ spec.add_development_dependency 'rake'
24
+ spec.add_development_dependency 'rake-compiler', '~> 0.9.2'
25
+ spec.add_development_dependency 'rspec'
26
+ spec.add_development_dependency 'pry'
27
+ spec.add_runtime_dependency 'rice', '~> 1.6'
28
+ end
@@ -0,0 +1,39 @@
1
+ #include "rice/Class.hpp"
2
+ #include "rice/Module.hpp"
3
+ #include "rice/ruby_try_catch.hpp"
4
+ #include "rice/Data_Type.hpp"
5
+ #include "rice/Constructor.hpp"
6
+ using namespace Rice;
7
+
8
+ #include "random_set.h"
9
+ #include "priority_set.h"
10
+
11
+ extern "C"
12
+ void Init_diff_set_ext() {
13
+ RUBY_TRY
14
+ {
15
+ Module rb_mDiffSet = define_module("DiffSet");
16
+
17
+ Data_Type<RandomSet> rb_cRandomSet = define_class_under<RandomSet>(rb_mDiffSet, "RandomSet")
18
+ .define_constructor(Constructor<RandomSet>())
19
+ .define_method("add", &RandomSet::add, (Arg("id"), Arg("priority") = 0.0))
20
+ .define_method("remove", &RandomSet::remove)
21
+ .define_method("sample", &RandomSet::sample)
22
+ .define_method("subtract", &RandomSet::subtract)
23
+ .define_method("include?", &RandomSet::includes)
24
+ .define_method("to_a", &RandomSet::to_a)
25
+ .define_method("size", &RandomSet::size);
26
+
27
+ Data_Type<PrioritySet> rb_cPrioritySet = define_class_under<PrioritySet>(rb_mDiffSet, "PrioritySet")
28
+ .define_constructor(Constructor<PrioritySet>())
29
+ .define_method("add", &PrioritySet::add, (Arg("id"), Arg("priority") = 0.0))
30
+ .define_method("remove", &PrioritySet::remove)
31
+ .define_method("sample", &PrioritySet::sample)
32
+ .define_method("subtract", &PrioritySet::subtract)
33
+ .define_method("include?", &PrioritySet::includes)
34
+ .define_method("to_a", &PrioritySet::to_a)
35
+ .define_method("to_h", &PrioritySet::to_h)
36
+ .define_method("size", &PrioritySet::size);
37
+ }
38
+ RUBY_CATCH
39
+ }
@@ -0,0 +1,4 @@
1
+ require 'mkmf-rice'
2
+
3
+ extension_name = 'diff_set'
4
+ create_makefile 'diff_set/diff_set_ext'
@@ -0,0 +1,99 @@
1
+ #include "priority_set.h"
2
+
3
+ PrioritySet::PrioritySet() {
4
+ timeval time;
5
+ gettimeofday(&time, NULL);
6
+ long millis = (time.tv_sec * 1000.0) + (time.tv_usec / 1000.0);
7
+ this->rng.seed((uint)millis);
8
+ }
9
+
10
+ void PrioritySet::add(int id, double priority) {
11
+ if(includes(id)) {
12
+ element_handle handle = this->element_handles[id];
13
+ (*handle).priority = priority;
14
+ this->heap.update(handle);
15
+ return;
16
+ }
17
+
18
+ static boost::uniform_01<boost::random::mt19937> dist(this->rng);
19
+ element_handle handle = this->heap.push(element(id, priority, dist()));
20
+ this->element_handles.insert(std::make_pair<int, element_handle>(id, handle));
21
+ this->element_set.insert(id);
22
+ }
23
+
24
+ void PrioritySet::remove(int id) {
25
+ boost::unordered_set<int>::iterator set_it = this->element_set.find(id);
26
+
27
+ if(set_it != this->element_set.end()) {
28
+ this->element_set.erase(set_it);
29
+ element_handle handle = this->element_handles[id];
30
+ (*handle).priority = std::numeric_limits<int>::min();
31
+ (*handle).enabled = false;
32
+ this->heap.decrease(handle);
33
+ }
34
+ }
35
+
36
+ Array PrioritySet::sample(int limit) {
37
+ Array sampled;
38
+ fibonacci_heap::ordered_iterator it;
39
+
40
+ for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
41
+ if(it->enabled) {
42
+ sampled.push(it->id);
43
+ if(sampled.size() >= (size_t)limit) {
44
+ break;
45
+ }
46
+ }
47
+ }
48
+
49
+ return sampled;
50
+ }
51
+
52
+ bool PrioritySet::includes(int id) {
53
+ boost::unordered_set<int>::const_iterator it;
54
+ it = this->element_set.find(id);
55
+ return it != this->element_set.end();
56
+ }
57
+
58
+ Array PrioritySet::subtract(RandomSet &other, size_t limit) {
59
+ Array diff;
60
+ fibonacci_heap::ordered_iterator it;
61
+ boost::unordered_set<int>::const_iterator in_other;
62
+
63
+ for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
64
+ if(it->enabled && other.element_set.find(it->id) == other.element_set.end()) {
65
+ diff.push(it->id);
66
+ if(diff.size() >= limit) {
67
+ break;
68
+ }
69
+ }
70
+ }
71
+
72
+ return diff;
73
+ }
74
+
75
+ Array PrioritySet::to_a() {
76
+ Array array;
77
+ fibonacci_heap::ordered_iterator it;
78
+ for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
79
+ if(it->enabled) {
80
+ array.push(it->id);
81
+ }
82
+ }
83
+ return array;
84
+ }
85
+
86
+ Hash PrioritySet::to_h() {
87
+ Hash hash;
88
+ fibonacci_heap::ordered_iterator it;
89
+ for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
90
+ if(it->enabled) {
91
+ hash[it->id] = it->priority;
92
+ }
93
+ }
94
+ return hash;
95
+ }
96
+
97
+ size_t PrioritySet::size() {
98
+ return this->element_set.size();
99
+ }
@@ -0,0 +1,59 @@
1
+ #ifndef PRIORITY_SET_H
2
+ #define PRIORITY_SET_H
3
+
4
+ #include "rice/Object.hpp"
5
+ #include "rice/Array.hpp"
6
+ #include "rice/Hash.hpp"
7
+ using namespace Rice;
8
+
9
+ #include <boost/random.hpp>
10
+ #include <boost/heap/fibonacci_heap.hpp>
11
+ #include <boost/unordered_set.hpp>
12
+ #include <boost/unordered_map.hpp>
13
+
14
+ #include <sys/time.h>
15
+ #include <limits>
16
+
17
+ #include "random_set.h"
18
+
19
+ class PrioritySet {
20
+ public:
21
+ struct element {
22
+ int id;
23
+ double priority;
24
+ double random;
25
+ bool enabled;
26
+
27
+ element(int id, double priority, double random) {
28
+ enabled = true;
29
+ this->id = id;
30
+ this->priority = priority;
31
+ this->random = random;
32
+ }
33
+ };
34
+
35
+ struct comparator {
36
+ bool operator()(const element &a, const element &b) const {
37
+ return (a.priority < b.priority) || (a.priority == b.priority && a.random < b.random);
38
+ }
39
+ };
40
+
41
+ PrioritySet();
42
+ void add(int id, double priority = 0.0);
43
+ void remove(int id);
44
+ Array sample(int limit);
45
+ bool includes(int id);
46
+ Array subtract(RandomSet &other, size_t limit);
47
+ Array to_a();
48
+ Hash to_h();
49
+ size_t size();
50
+ protected:
51
+ typedef boost::heap::fibonacci_heap<element, boost::heap::compare<comparator> > fibonacci_heap;
52
+ typedef fibonacci_heap::handle_type element_handle;
53
+ fibonacci_heap heap;
54
+ boost::unordered_map<int, element_handle > element_handles;
55
+ boost::unordered_set<int> element_set;
56
+ boost::random::mt19937 rng;
57
+ };
58
+
59
+ #endif
@@ -0,0 +1,108 @@
1
+ #include "random_set.h"
2
+
3
+ RandomSet::RandomSet() {
4
+ timeval time;
5
+ gettimeofday(&time, NULL);
6
+ long millis = (time.tv_sec * 1000.0) + (time.tv_usec / 1000.0);
7
+ this->rng.seed((uint)millis);
8
+ }
9
+
10
+ void RandomSet::add(int element, double priority) {
11
+ this->element_set.insert(element);
12
+ this->elements.push_back(element);
13
+ }
14
+
15
+ void RandomSet::remove(int element) {
16
+ boost::unordered_set<int>::iterator set_it = this->element_set.find(element);
17
+
18
+ if(set_it != this->element_set.end()) {
19
+ this->element_set.erase(set_it);
20
+
21
+ std::vector<int>::iterator it = iterator_to(element);
22
+ if(it != this->elements.end()) {
23
+ this->elements.erase(it);
24
+ }
25
+ }
26
+ }
27
+
28
+ Array RandomSet::sample(int limit) {
29
+ Array sampled;
30
+ int swapIndex;
31
+ int tmp;
32
+ int upper_bound = (int)this->elements.size() - 1;
33
+ boost::random::uniform_int_distribution<> dist;
34
+
35
+ for(int i = 0; i < limit && i < (int)this->elements.size(); i++) {
36
+ dist = boost::random::uniform_int_distribution<>(std::min(i + 1, upper_bound), upper_bound);
37
+ swapIndex = dist(rng);
38
+ tmp = this->elements[i];
39
+ this->elements[i] = this->elements[swapIndex];
40
+ this->elements[swapIndex] = tmp;
41
+ sampled.push(this->elements[i]);
42
+ }
43
+
44
+ return sampled;
45
+ }
46
+
47
+ bool RandomSet::includes(int element) {
48
+ boost::unordered_set<int>::const_iterator it;
49
+ it = this->element_set.find(element);
50
+ return it != this->element_set.end();
51
+ }
52
+
53
+ std::vector<int>::iterator RandomSet::iterator_to(int element) {
54
+ std::vector<int>::iterator it;
55
+ for(it = this->elements.begin(); it != this->elements.end(); it++) {
56
+ if(element == *it) {
57
+ return it;
58
+ }
59
+ }
60
+
61
+ return this->elements.end();
62
+ }
63
+
64
+ Array RandomSet::subtract(RandomSet &other, size_t limit) {
65
+ Array diff;
66
+ int element;
67
+ int swapIndex;
68
+ int tmp;
69
+ int upper_bound = (int)this->elements.size() - 1;
70
+
71
+ boost::unordered_set<int>::const_iterator in_other;
72
+ boost::random::uniform_int_distribution<> dist;
73
+
74
+ for(int i = 0; i < (int)this->elements.size(); i++) {
75
+ dist = boost::random::uniform_int_distribution<>(std::min(i + 1, upper_bound), upper_bound);
76
+ swapIndex = dist(rng);
77
+ tmp = this->elements[i];
78
+ this->elements[i] = this->elements[swapIndex];
79
+ this->elements[swapIndex] = tmp;
80
+
81
+ element = this->elements[i];
82
+ in_other = other.element_set.find(element);
83
+
84
+ if(in_other == other.element_set.end()) {
85
+ diff.push(element);
86
+ if(diff.size() >= limit) {
87
+ break;
88
+ }
89
+ }
90
+ }
91
+
92
+ return diff;
93
+ }
94
+
95
+ Array RandomSet::to_a() {
96
+ Array array;
97
+
98
+ std::vector<int>::iterator it;
99
+ for(it = this->elements.begin(); it != this->elements.end(); it++) {
100
+ array.push(*it);
101
+ }
102
+
103
+ return array;
104
+ }
105
+
106
+ size_t RandomSet::size() {
107
+ return this->elements.size();
108
+ }
@@ -0,0 +1,33 @@
1
+ #ifndef RANDOM_SET_H
2
+ #define RANDOM_SET_H
3
+
4
+ #include "rice/Object.hpp"
5
+ #include "rice/Array.hpp"
6
+ using namespace Rice;
7
+
8
+ #include <boost/random/mersenne_twister.hpp>
9
+ #include <boost/random/uniform_int_distribution.hpp>
10
+ #include <boost/unordered_set.hpp>
11
+
12
+ #include <vector>
13
+ #include <sys/time.h>
14
+
15
+ class RandomSet {
16
+ public:
17
+ RandomSet();
18
+ void add(int element, double priority = 0.0);
19
+ void remove(int element);
20
+ Array sample(int limit);
21
+ bool includes(int element);
22
+ Array subtract(RandomSet &other, size_t limit);
23
+ Array to_a();
24
+ size_t size();
25
+ boost::unordered_set<int> element_set;
26
+ protected:
27
+ std::vector<int> elements;
28
+ boost::random::mt19937 rng;
29
+
30
+ std::vector<int>::iterator iterator_to(int element);
31
+ };
32
+
33
+ #endif
@@ -0,0 +1,23 @@
1
+ module DiffSet
2
+ module Pairwise
3
+ def self.included(klass)
4
+ klass.class_eval do
5
+ alias_method :_c_subtract, :subtract
6
+ def subtract(set, limit)
7
+ _in_pairs _c_subtract(set, 2 * limit)
8
+ end
9
+
10
+ alias_method :_c_sample, :sample
11
+ def sample(limit)
12
+ _in_pairs _c_sample(2 * limit)
13
+ end
14
+ end
15
+ end
16
+
17
+ protected
18
+
19
+ def _in_pairs(list)
20
+ list.each_slice(2).to_a.reject{ |pair| pair.length != 2 }
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,5 @@
1
+ module DiffSet
2
+ class PairwisePrioritySet < PrioritySet
3
+ include Pairwise
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module DiffSet
2
+ class PairwiseRandomSet < RandomSet
3
+ include Pairwise
4
+ end
5
+ end
@@ -0,0 +1,3 @@
1
+ module DiffSet
2
+ VERSION = '0.0.2'
3
+ end
data/lib/diff_set.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'diff_set/version'
2
+ require 'diff_set/diff_set_ext'
3
+
4
+ require 'diff_set/pairwise'
5
+ require 'diff_set/pairwise_random_set'
6
+ require 'diff_set/pairwise_priority_set'
7
+
8
+ module DiffSet
9
+
10
+ end
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffSet
4
+ describe PairwisePrioritySet do
5
+ it_behaves_like 'a set'
6
+
7
+ let(:set) do
8
+ ids = (1..5).to_a
9
+ priorities = ids.reverse
10
+
11
+ PairwisePrioritySet.new.tap do |priority_set|
12
+ ids.zip(priorities).each{ |id, priority| priority_set.add id, priority }
13
+ end
14
+ end
15
+
16
+ let(:other_set) do
17
+ RandomSet.new.tap do |random_set|
18
+ 1.upto(3).each{ |i| random_set.add i }
19
+ end
20
+ end
21
+
22
+ it 'should sample elements in order' do
23
+ set.sample(2).should == [[1, 2], [3, 4]]
24
+ set.sample(3).length.should == 2
25
+ end
26
+
27
+ it 'should not include removed elements in subtractions' do
28
+ set.add 6, 0
29
+ set.subtract(other_set, 5).flatten.should == [4, 5]
30
+ set.remove 5
31
+ set.subtract(other_set, 5).flatten.should == [4, 6]
32
+ end
33
+
34
+ it 'should subtract another set' do
35
+ set.subtract(other_set, 5).length.should == 1
36
+ set.subtract(other_set, 5).first.should == [4, 5]
37
+ end
38
+
39
+ it 'should always return pairs' do
40
+ 6.upto(8).each{ |i| set.add i, rand }
41
+ set.subtract(other_set, 3).each{ |pair| pair.length.should == 2 }
42
+ end
43
+
44
+ it 'should update the priority' do
45
+ set.to_a.first.should == 1
46
+ set.to_h[1].should be_within(0.1).of(5)
47
+ set.add 1, 0
48
+ set.to_a.first.should == 2
49
+ set.to_a.last.should == 1
50
+ set.to_h[1].should be_within(0.1).of(0)
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffSet
4
+ describe PairwiseRandomSet do
5
+ it_behaves_like 'a set'
6
+
7
+ def create_set(elements)
8
+ PairwiseRandomSet.new.tap do |random_set|
9
+ 1.upto(elements).each{ |i| random_set.add i }
10
+ end
11
+ end
12
+
13
+ let(:set){ create_set(5) }
14
+ let(:other_set){ create_set(3) }
15
+
16
+ it 'should sample pairs of elements' do
17
+ set.sample(2).collect(&:length).should == [2, 2]
18
+ set.sample(3).length.should == 2
19
+ end
20
+
21
+ it 'should not include removed elements in subtractions' do
22
+ set.remove 5
23
+ set.subtract(other_set, 5).flatten.should_not include 5
24
+ end
25
+
26
+ it 'should subtract another set' do
27
+ set.subtract(other_set, 5).length.should == 1
28
+ set.subtract(other_set, 5).first.should =~ [4, 5]
29
+ end
30
+
31
+ it 'should always return pairs' do
32
+ 6.upto(8).each{ |i| set.add i }
33
+ set.subtract(other_set, 3).each{ |pair| pair.length.should == 2 }
34
+ end
35
+
36
+ it 'should mutate the order of the elements when sampling' do
37
+ set_before = set.to_a
38
+ set.sample 5
39
+ set_before.should =~ set.to_a
40
+ set_before.should_not == set.to_a
41
+ end
42
+
43
+ it 'should mutate the order of the elements on a subtraction' do
44
+ set_before = set.to_a
45
+ set.subtract other_set, 5
46
+ set_before.should =~ set.to_a
47
+ set_before.should_not == set.to_a
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,45 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffSet
4
+ describe PrioritySet do
5
+ it_behaves_like 'a set'
6
+
7
+ let(:set) do
8
+ ids = (1..5).to_a
9
+ priorities = ids.reverse
10
+
11
+ PrioritySet.new.tap do |priority_set|
12
+ ids.zip(priorities).each{ |id, priority| priority_set.add id, priority }
13
+ end
14
+ end
15
+
16
+ let(:other_set) do
17
+ RandomSet.new.tap do |random_set|
18
+ 1.upto(3).each{ |i| random_set.add i }
19
+ end
20
+ end
21
+
22
+ it 'should sample elements in order' do
23
+ set.sample(5).should == (1..5).to_a
24
+ end
25
+
26
+ it 'should subtract another set' do
27
+ set.subtract(other_set, 5).should == [4, 5]
28
+ set.subtract(other_set, 1).first.should == 4
29
+ end
30
+
31
+ it 'should not include removed elements in subtractions' do
32
+ set.remove 5
33
+ set.subtract(other_set, 5).should == [4]
34
+ end
35
+
36
+ it 'should update the priority' do
37
+ set.to_a.first.should == 1
38
+ set.to_h[1].should be_within(0.1).of(5)
39
+ set.add 1, 0
40
+ set.to_a.first.should == 2
41
+ set.to_a.last.should == 1
42
+ set.to_h[1].should be_within(0.1).of(0)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,40 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffSet
4
+ describe RandomSet do
5
+ it_behaves_like 'a set'
6
+
7
+ def create_set(elements)
8
+ RandomSet.new.tap do |random_set|
9
+ 1.upto(elements).each{ |i| random_set.add i }
10
+ end
11
+ end
12
+
13
+ let(:set){ create_set(5) }
14
+ let(:other_set){ create_set(3) }
15
+
16
+ it 'should subtract another set' do
17
+ set.subtract(other_set, 5).should =~ [4, 5]
18
+ [4, 5].should include set.subtract(other_set, 1).first
19
+ end
20
+
21
+ it 'should not include removed elements in subtractions' do
22
+ set.remove 5
23
+ set.subtract(other_set, 5).should == [4]
24
+ end
25
+
26
+ it 'should mutate the order of the elements when sampling' do
27
+ set_before = set.to_a
28
+ set.sample 5
29
+ set_before.should =~ set.to_a
30
+ set_before.should_not == set.to_a
31
+ end
32
+
33
+ it 'should mutate the order of the elements on a subtraction' do
34
+ set_before = set.to_a
35
+ set.subtract other_set, 5
36
+ set_before.should =~ set.to_a
37
+ set_before.should_not == set.to_a
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,17 @@
1
+ root = File.expand_path File.join(File.dirname(__FILE__), '../')
2
+ %w(lib ext).each do |name|
3
+ dir = File.join root, name
4
+ $LOAD_PATH.unshift dir unless $LOAD_PATH.include? dir
5
+ end
6
+
7
+ require 'pry'
8
+ require 'diff_set'
9
+
10
+ Dir["./spec/support/**/*.rb"].sort.each{ |f| require f }
11
+
12
+ RSpec.configure do |config|
13
+ config.treat_symbols_as_metadata_keys_with_true_values = true
14
+ config.run_all_when_everything_filtered = true
15
+ config.filter_run :focus
16
+ config.order = 'random'
17
+ end
@@ -0,0 +1,34 @@
1
+ shared_examples_for 'a set' do
2
+ it 'should convert to an Array' do
3
+ set.to_a.should =~ (1..5).to_a
4
+ end
5
+
6
+ it 'should add elements' do
7
+ set.add 100
8
+ set.to_a.should include 100
9
+ end
10
+
11
+ it 'should remove elements' do
12
+ set.remove 1
13
+ set.to_a.should_not include 1
14
+ end
15
+
16
+ it 'should sample elements' do
17
+ set.sample(2).length.should == 2
18
+ end
19
+
20
+ it 'should not include removed elements in samples' do
21
+ set.remove 5
22
+ set.sample(5).should_not include 5
23
+ end
24
+
25
+ it 'should know how many elements it contains' do
26
+ expect{ set.add 100 }.to change{ set.size }.from(5).to 6
27
+ end
28
+
29
+ it 'should know if it contains an element' do
30
+ set.should_not include 100
31
+ set.add 100
32
+ set.should include 100
33
+ end
34
+ end
metadata ADDED
@@ -0,0 +1,160 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: diff_set
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Michael Parrish
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.9.2
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.9.2
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rice
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.6'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.6'
97
+ description: ''
98
+ email:
99
+ - michael@zooniverse.org
100
+ executables: []
101
+ extensions:
102
+ - ext/diff_set/extconf.rb
103
+ extra_rdoc_files: []
104
+ files:
105
+ - ".gitignore"
106
+ - ".rspec"
107
+ - Gemfile
108
+ - LICENSE.txt
109
+ - README.md
110
+ - Rakefile
111
+ - diff_set.gemspec
112
+ - ext/diff_set/diff_set_ext.cpp
113
+ - ext/diff_set/extconf.rb
114
+ - ext/diff_set/priority_set.cpp
115
+ - ext/diff_set/priority_set.h
116
+ - ext/diff_set/random_set.cpp
117
+ - ext/diff_set/random_set.h
118
+ - lib/diff_set.rb
119
+ - lib/diff_set/pairwise.rb
120
+ - lib/diff_set/pairwise_priority_set.rb
121
+ - lib/diff_set/pairwise_random_set.rb
122
+ - lib/diff_set/version.rb
123
+ - spec/pairwise_priority_set_spec.rb
124
+ - spec/pairwise_random_set_spec.rb
125
+ - spec/priority_set_spec.rb
126
+ - spec/random_set_spec.rb
127
+ - spec/spec_helper.rb
128
+ - spec/support/shared_examples_for_set.rb
129
+ homepage: https://github.com/parrish/diff_set
130
+ licenses:
131
+ - MIT
132
+ metadata: {}
133
+ post_install_message:
134
+ rdoc_options: []
135
+ require_paths:
136
+ - lib
137
+ required_ruby_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ required_rubygems_version: !ruby/object:Gem::Requirement
143
+ requirements:
144
+ - - ">="
145
+ - !ruby/object:Gem::Version
146
+ version: '0'
147
+ requirements: []
148
+ rubyforge_project:
149
+ rubygems_version: 2.2.2
150
+ signing_key:
151
+ specification_version: 4
152
+ summary: DiffSet contains a collection of data structures optimized to perform partial
153
+ set subtractions
154
+ test_files:
155
+ - spec/pairwise_priority_set_spec.rb
156
+ - spec/pairwise_random_set_spec.rb
157
+ - spec/priority_set_spec.rb
158
+ - spec/random_set_spec.rb
159
+ - spec/spec_helper.rb
160
+ - spec/support/shared_examples_for_set.rb