diff_set 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +24 -0
- data/.rspec +2 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +22 -0
- data/README.md +46 -0
- data/Rakefile +20 -0
- data/diff_set.gemspec +28 -0
- data/ext/diff_set/diff_set_ext.cpp +39 -0
- data/ext/diff_set/extconf.rb +4 -0
- data/ext/diff_set/priority_set.cpp +99 -0
- data/ext/diff_set/priority_set.h +59 -0
- data/ext/diff_set/random_set.cpp +108 -0
- data/ext/diff_set/random_set.h +33 -0
- data/lib/diff_set/pairwise.rb +23 -0
- data/lib/diff_set/pairwise_priority_set.rb +5 -0
- data/lib/diff_set/pairwise_random_set.rb +5 -0
- data/lib/diff_set/version.rb +3 -0
- data/lib/diff_set.rb +10 -0
- data/spec/pairwise_priority_set_spec.rb +53 -0
- data/spec/pairwise_random_set_spec.rb +50 -0
- data/spec/priority_set_spec.rb +45 -0
- data/spec/random_set_spec.rb +40 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/support/shared_examples_for_set.rb +34 -0
- metadata +160 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: deaf2800e3064f34e2e8b253ddec8cae53d29951
|
4
|
+
data.tar.gz: d61484cba1501f4f453996829e2347698b28b77a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 74d052ee5da4650f47ab40a1d491cc14678597b76a4110368daf09b85680de9cf075077a7d16c86e46fe359d0c14797b8f2ff2e8313c6d8b0c2429384992f360
|
7
|
+
data.tar.gz: 80e99fa750b79cae19630c94bb337425b62a2bd2b3f2452a0b7359ce2db5bb0c96574b23141178e079c2eb3b3995815b4012aa23ad06b0ffdde96a71828fa6f2
|
data/.gitignore
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
.DS_Store
|
19
|
+
.rvmrc
|
20
|
+
.ruby-version
|
21
|
+
*.o
|
22
|
+
Makefile
|
23
|
+
*.bundle
|
24
|
+
*.so
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Michael Parrish
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
# DiffSet
|
2
|
+
|
3
|
+
DiffSet contains a collection of data structures optimized to perform partial set subtractions.
|
4
|
+
|
5
|
+
- `DiffSet::RandomSet` Produces a randomized set difference
|
6
|
+
|
7
|
+
- `DiffSet::PrioritySet` Produces an ordered set difference
|
8
|
+
|
9
|
+
- `DiffSet::PairwiseRandomSet` Presents a random set difference as a list of pairs
|
10
|
+
|
11
|
+
- `DiffSet::PairwisePrioritySet` Presents an ordered set difference as a list of pairs
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
1. Install [Boost](http://www.boost.org/):
|
16
|
+
|
17
|
+
- OS X: `brew update && brew install boost`
|
18
|
+
|
19
|
+
- Ubuntu: `sudo apt-get update && sudo apt-get install libboost-all-dev`
|
20
|
+
|
21
|
+
3. Add this line to your application's Gemfile: `gem 'diff_set'`
|
22
|
+
|
23
|
+
4. And then execute: `bundle`
|
24
|
+
|
25
|
+
To install rice **Ruby must be compiled with shared libraries enabled**
|
26
|
+
|
27
|
+
- rvm: `rvm reinstall [version] -- --enable-shared`
|
28
|
+
|
29
|
+
- rbenv: `CONFIGURE_OPTS="--enable-shared" rbenv install [version]`
|
30
|
+
|
31
|
+
|
32
|
+
## Usage
|
33
|
+
|
34
|
+
The API is pretty straightforward, and [the specs](https://github.com/parrish/diff_set/tree/master/spec) have examples.
|
35
|
+
|
36
|
+
## Testing
|
37
|
+
|
38
|
+
Run the specs with `rake`
|
39
|
+
|
40
|
+
## Contributing
|
41
|
+
|
42
|
+
1. Fork it ( http://github.com/parrish/diff_set/fork )
|
43
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
44
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
45
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
46
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/extensiontask'
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
require 'rubygems/package_task'
|
5
|
+
|
6
|
+
GEMSPEC = Gem::Specification.load 'diff_set.gemspec'
|
7
|
+
|
8
|
+
Gem::PackageTask.new(GEMSPEC) do |pkg|
|
9
|
+
pkg.need_zip = true
|
10
|
+
pkg.need_tar = true
|
11
|
+
end
|
12
|
+
|
13
|
+
Rake::ExtensionTask.new('diff_set_ext', GEMSPEC) do |ext|
|
14
|
+
ext.ext_dir = 'ext/diff_set'
|
15
|
+
ext.lib_dir = 'lib/diff_set'
|
16
|
+
ext.source_pattern = '*.{h,cpp}'
|
17
|
+
end
|
18
|
+
|
19
|
+
RSpec::Core::RakeTask.new :spec
|
20
|
+
task default: [:compile, :spec]
|
data/diff_set.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'diff_set/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'diff_set'
|
8
|
+
spec.version = DiffSet::VERSION
|
9
|
+
spec.authors = ['Michael Parrish']
|
10
|
+
spec.email = ['michael@zooniverse.org']
|
11
|
+
spec.summary = 'DiffSet contains a collection of data structures optimized to perform partial set subtractions'
|
12
|
+
spec.description = ''
|
13
|
+
spec.homepage = 'https://github.com/parrish/diff_set'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
spec.extensions = ['ext/diff_set/extconf.rb']
|
21
|
+
|
22
|
+
spec.add_development_dependency 'bundler', '~> 1.5'
|
23
|
+
spec.add_development_dependency 'rake'
|
24
|
+
spec.add_development_dependency 'rake-compiler', '~> 0.9.2'
|
25
|
+
spec.add_development_dependency 'rspec'
|
26
|
+
spec.add_development_dependency 'pry'
|
27
|
+
spec.add_runtime_dependency 'rice', '~> 1.6'
|
28
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#include "rice/Class.hpp"
|
2
|
+
#include "rice/Module.hpp"
|
3
|
+
#include "rice/ruby_try_catch.hpp"
|
4
|
+
#include "rice/Data_Type.hpp"
|
5
|
+
#include "rice/Constructor.hpp"
|
6
|
+
using namespace Rice;
|
7
|
+
|
8
|
+
#include "random_set.h"
|
9
|
+
#include "priority_set.h"
|
10
|
+
|
11
|
+
extern "C"
|
12
|
+
void Init_diff_set_ext() {
|
13
|
+
RUBY_TRY
|
14
|
+
{
|
15
|
+
Module rb_mDiffSet = define_module("DiffSet");
|
16
|
+
|
17
|
+
Data_Type<RandomSet> rb_cRandomSet = define_class_under<RandomSet>(rb_mDiffSet, "RandomSet")
|
18
|
+
.define_constructor(Constructor<RandomSet>())
|
19
|
+
.define_method("add", &RandomSet::add, (Arg("id"), Arg("priority") = 0.0))
|
20
|
+
.define_method("remove", &RandomSet::remove)
|
21
|
+
.define_method("sample", &RandomSet::sample)
|
22
|
+
.define_method("subtract", &RandomSet::subtract)
|
23
|
+
.define_method("include?", &RandomSet::includes)
|
24
|
+
.define_method("to_a", &RandomSet::to_a)
|
25
|
+
.define_method("size", &RandomSet::size);
|
26
|
+
|
27
|
+
Data_Type<PrioritySet> rb_cPrioritySet = define_class_under<PrioritySet>(rb_mDiffSet, "PrioritySet")
|
28
|
+
.define_constructor(Constructor<PrioritySet>())
|
29
|
+
.define_method("add", &PrioritySet::add, (Arg("id"), Arg("priority") = 0.0))
|
30
|
+
.define_method("remove", &PrioritySet::remove)
|
31
|
+
.define_method("sample", &PrioritySet::sample)
|
32
|
+
.define_method("subtract", &PrioritySet::subtract)
|
33
|
+
.define_method("include?", &PrioritySet::includes)
|
34
|
+
.define_method("to_a", &PrioritySet::to_a)
|
35
|
+
.define_method("to_h", &PrioritySet::to_h)
|
36
|
+
.define_method("size", &PrioritySet::size);
|
37
|
+
}
|
38
|
+
RUBY_CATCH
|
39
|
+
}
|
@@ -0,0 +1,99 @@
|
|
1
|
+
#include "priority_set.h"
|
2
|
+
|
3
|
+
PrioritySet::PrioritySet() {
|
4
|
+
timeval time;
|
5
|
+
gettimeofday(&time, NULL);
|
6
|
+
long millis = (time.tv_sec * 1000.0) + (time.tv_usec / 1000.0);
|
7
|
+
this->rng.seed((uint)millis);
|
8
|
+
}
|
9
|
+
|
10
|
+
void PrioritySet::add(int id, double priority) {
|
11
|
+
if(includes(id)) {
|
12
|
+
element_handle handle = this->element_handles[id];
|
13
|
+
(*handle).priority = priority;
|
14
|
+
this->heap.update(handle);
|
15
|
+
return;
|
16
|
+
}
|
17
|
+
|
18
|
+
static boost::uniform_01<boost::random::mt19937> dist(this->rng);
|
19
|
+
element_handle handle = this->heap.push(element(id, priority, dist()));
|
20
|
+
this->element_handles.insert(std::make_pair<int, element_handle>(id, handle));
|
21
|
+
this->element_set.insert(id);
|
22
|
+
}
|
23
|
+
|
24
|
+
void PrioritySet::remove(int id) {
|
25
|
+
boost::unordered_set<int>::iterator set_it = this->element_set.find(id);
|
26
|
+
|
27
|
+
if(set_it != this->element_set.end()) {
|
28
|
+
this->element_set.erase(set_it);
|
29
|
+
element_handle handle = this->element_handles[id];
|
30
|
+
(*handle).priority = std::numeric_limits<int>::min();
|
31
|
+
(*handle).enabled = false;
|
32
|
+
this->heap.decrease(handle);
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
Array PrioritySet::sample(int limit) {
|
37
|
+
Array sampled;
|
38
|
+
fibonacci_heap::ordered_iterator it;
|
39
|
+
|
40
|
+
for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
|
41
|
+
if(it->enabled) {
|
42
|
+
sampled.push(it->id);
|
43
|
+
if(sampled.size() >= (size_t)limit) {
|
44
|
+
break;
|
45
|
+
}
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
return sampled;
|
50
|
+
}
|
51
|
+
|
52
|
+
bool PrioritySet::includes(int id) {
|
53
|
+
boost::unordered_set<int>::const_iterator it;
|
54
|
+
it = this->element_set.find(id);
|
55
|
+
return it != this->element_set.end();
|
56
|
+
}
|
57
|
+
|
58
|
+
Array PrioritySet::subtract(RandomSet &other, size_t limit) {
|
59
|
+
Array diff;
|
60
|
+
fibonacci_heap::ordered_iterator it;
|
61
|
+
boost::unordered_set<int>::const_iterator in_other;
|
62
|
+
|
63
|
+
for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
|
64
|
+
if(it->enabled && other.element_set.find(it->id) == other.element_set.end()) {
|
65
|
+
diff.push(it->id);
|
66
|
+
if(diff.size() >= limit) {
|
67
|
+
break;
|
68
|
+
}
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
return diff;
|
73
|
+
}
|
74
|
+
|
75
|
+
Array PrioritySet::to_a() {
|
76
|
+
Array array;
|
77
|
+
fibonacci_heap::ordered_iterator it;
|
78
|
+
for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
|
79
|
+
if(it->enabled) {
|
80
|
+
array.push(it->id);
|
81
|
+
}
|
82
|
+
}
|
83
|
+
return array;
|
84
|
+
}
|
85
|
+
|
86
|
+
Hash PrioritySet::to_h() {
|
87
|
+
Hash hash;
|
88
|
+
fibonacci_heap::ordered_iterator it;
|
89
|
+
for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
|
90
|
+
if(it->enabled) {
|
91
|
+
hash[it->id] = it->priority;
|
92
|
+
}
|
93
|
+
}
|
94
|
+
return hash;
|
95
|
+
}
|
96
|
+
|
97
|
+
size_t PrioritySet::size() {
|
98
|
+
return this->element_set.size();
|
99
|
+
}
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#ifndef PRIORITY_SET_H
|
2
|
+
#define PRIORITY_SET_H
|
3
|
+
|
4
|
+
#include "rice/Object.hpp"
|
5
|
+
#include "rice/Array.hpp"
|
6
|
+
#include "rice/Hash.hpp"
|
7
|
+
using namespace Rice;
|
8
|
+
|
9
|
+
#include <boost/random.hpp>
|
10
|
+
#include <boost/heap/fibonacci_heap.hpp>
|
11
|
+
#include <boost/unordered_set.hpp>
|
12
|
+
#include <boost/unordered_map.hpp>
|
13
|
+
|
14
|
+
#include <sys/time.h>
|
15
|
+
#include <limits>
|
16
|
+
|
17
|
+
#include "random_set.h"
|
18
|
+
|
19
|
+
class PrioritySet {
|
20
|
+
public:
|
21
|
+
struct element {
|
22
|
+
int id;
|
23
|
+
double priority;
|
24
|
+
double random;
|
25
|
+
bool enabled;
|
26
|
+
|
27
|
+
element(int id, double priority, double random) {
|
28
|
+
enabled = true;
|
29
|
+
this->id = id;
|
30
|
+
this->priority = priority;
|
31
|
+
this->random = random;
|
32
|
+
}
|
33
|
+
};
|
34
|
+
|
35
|
+
struct comparator {
|
36
|
+
bool operator()(const element &a, const element &b) const {
|
37
|
+
return (a.priority < b.priority) || (a.priority == b.priority && a.random < b.random);
|
38
|
+
}
|
39
|
+
};
|
40
|
+
|
41
|
+
PrioritySet();
|
42
|
+
void add(int id, double priority = 0.0);
|
43
|
+
void remove(int id);
|
44
|
+
Array sample(int limit);
|
45
|
+
bool includes(int id);
|
46
|
+
Array subtract(RandomSet &other, size_t limit);
|
47
|
+
Array to_a();
|
48
|
+
Hash to_h();
|
49
|
+
size_t size();
|
50
|
+
protected:
|
51
|
+
typedef boost::heap::fibonacci_heap<element, boost::heap::compare<comparator> > fibonacci_heap;
|
52
|
+
typedef fibonacci_heap::handle_type element_handle;
|
53
|
+
fibonacci_heap heap;
|
54
|
+
boost::unordered_map<int, element_handle > element_handles;
|
55
|
+
boost::unordered_set<int> element_set;
|
56
|
+
boost::random::mt19937 rng;
|
57
|
+
};
|
58
|
+
|
59
|
+
#endif
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#include "random_set.h"
|
2
|
+
|
3
|
+
RandomSet::RandomSet() {
|
4
|
+
timeval time;
|
5
|
+
gettimeofday(&time, NULL);
|
6
|
+
long millis = (time.tv_sec * 1000.0) + (time.tv_usec / 1000.0);
|
7
|
+
this->rng.seed((uint)millis);
|
8
|
+
}
|
9
|
+
|
10
|
+
void RandomSet::add(int element, double priority) {
|
11
|
+
this->element_set.insert(element);
|
12
|
+
this->elements.push_back(element);
|
13
|
+
}
|
14
|
+
|
15
|
+
void RandomSet::remove(int element) {
|
16
|
+
boost::unordered_set<int>::iterator set_it = this->element_set.find(element);
|
17
|
+
|
18
|
+
if(set_it != this->element_set.end()) {
|
19
|
+
this->element_set.erase(set_it);
|
20
|
+
|
21
|
+
std::vector<int>::iterator it = iterator_to(element);
|
22
|
+
if(it != this->elements.end()) {
|
23
|
+
this->elements.erase(it);
|
24
|
+
}
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
Array RandomSet::sample(int limit) {
|
29
|
+
Array sampled;
|
30
|
+
int swapIndex;
|
31
|
+
int tmp;
|
32
|
+
int upper_bound = (int)this->elements.size() - 1;
|
33
|
+
boost::random::uniform_int_distribution<> dist;
|
34
|
+
|
35
|
+
for(int i = 0; i < limit && i < (int)this->elements.size(); i++) {
|
36
|
+
dist = boost::random::uniform_int_distribution<>(std::min(i + 1, upper_bound), upper_bound);
|
37
|
+
swapIndex = dist(rng);
|
38
|
+
tmp = this->elements[i];
|
39
|
+
this->elements[i] = this->elements[swapIndex];
|
40
|
+
this->elements[swapIndex] = tmp;
|
41
|
+
sampled.push(this->elements[i]);
|
42
|
+
}
|
43
|
+
|
44
|
+
return sampled;
|
45
|
+
}
|
46
|
+
|
47
|
+
bool RandomSet::includes(int element) {
|
48
|
+
boost::unordered_set<int>::const_iterator it;
|
49
|
+
it = this->element_set.find(element);
|
50
|
+
return it != this->element_set.end();
|
51
|
+
}
|
52
|
+
|
53
|
+
std::vector<int>::iterator RandomSet::iterator_to(int element) {
|
54
|
+
std::vector<int>::iterator it;
|
55
|
+
for(it = this->elements.begin(); it != this->elements.end(); it++) {
|
56
|
+
if(element == *it) {
|
57
|
+
return it;
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
return this->elements.end();
|
62
|
+
}
|
63
|
+
|
64
|
+
Array RandomSet::subtract(RandomSet &other, size_t limit) {
|
65
|
+
Array diff;
|
66
|
+
int element;
|
67
|
+
int swapIndex;
|
68
|
+
int tmp;
|
69
|
+
int upper_bound = (int)this->elements.size() - 1;
|
70
|
+
|
71
|
+
boost::unordered_set<int>::const_iterator in_other;
|
72
|
+
boost::random::uniform_int_distribution<> dist;
|
73
|
+
|
74
|
+
for(int i = 0; i < (int)this->elements.size(); i++) {
|
75
|
+
dist = boost::random::uniform_int_distribution<>(std::min(i + 1, upper_bound), upper_bound);
|
76
|
+
swapIndex = dist(rng);
|
77
|
+
tmp = this->elements[i];
|
78
|
+
this->elements[i] = this->elements[swapIndex];
|
79
|
+
this->elements[swapIndex] = tmp;
|
80
|
+
|
81
|
+
element = this->elements[i];
|
82
|
+
in_other = other.element_set.find(element);
|
83
|
+
|
84
|
+
if(in_other == other.element_set.end()) {
|
85
|
+
diff.push(element);
|
86
|
+
if(diff.size() >= limit) {
|
87
|
+
break;
|
88
|
+
}
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
return diff;
|
93
|
+
}
|
94
|
+
|
95
|
+
Array RandomSet::to_a() {
|
96
|
+
Array array;
|
97
|
+
|
98
|
+
std::vector<int>::iterator it;
|
99
|
+
for(it = this->elements.begin(); it != this->elements.end(); it++) {
|
100
|
+
array.push(*it);
|
101
|
+
}
|
102
|
+
|
103
|
+
return array;
|
104
|
+
}
|
105
|
+
|
106
|
+
size_t RandomSet::size() {
|
107
|
+
return this->elements.size();
|
108
|
+
}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#ifndef RANDOM_SET_H
|
2
|
+
#define RANDOM_SET_H
|
3
|
+
|
4
|
+
#include "rice/Object.hpp"
|
5
|
+
#include "rice/Array.hpp"
|
6
|
+
using namespace Rice;
|
7
|
+
|
8
|
+
#include <boost/random/mersenne_twister.hpp>
|
9
|
+
#include <boost/random/uniform_int_distribution.hpp>
|
10
|
+
#include <boost/unordered_set.hpp>
|
11
|
+
|
12
|
+
#include <vector>
|
13
|
+
#include <sys/time.h>
|
14
|
+
|
15
|
+
class RandomSet {
|
16
|
+
public:
|
17
|
+
RandomSet();
|
18
|
+
void add(int element, double priority = 0.0);
|
19
|
+
void remove(int element);
|
20
|
+
Array sample(int limit);
|
21
|
+
bool includes(int element);
|
22
|
+
Array subtract(RandomSet &other, size_t limit);
|
23
|
+
Array to_a();
|
24
|
+
size_t size();
|
25
|
+
boost::unordered_set<int> element_set;
|
26
|
+
protected:
|
27
|
+
std::vector<int> elements;
|
28
|
+
boost::random::mt19937 rng;
|
29
|
+
|
30
|
+
std::vector<int>::iterator iterator_to(int element);
|
31
|
+
};
|
32
|
+
|
33
|
+
#endif
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module DiffSet
|
2
|
+
module Pairwise
|
3
|
+
def self.included(klass)
|
4
|
+
klass.class_eval do
|
5
|
+
alias_method :_c_subtract, :subtract
|
6
|
+
def subtract(set, limit)
|
7
|
+
_in_pairs _c_subtract(set, 2 * limit)
|
8
|
+
end
|
9
|
+
|
10
|
+
alias_method :_c_sample, :sample
|
11
|
+
def sample(limit)
|
12
|
+
_in_pairs _c_sample(2 * limit)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
protected
|
18
|
+
|
19
|
+
def _in_pairs(list)
|
20
|
+
list.each_slice(2).to_a.reject{ |pair| pair.length != 2 }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/diff_set.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module DiffSet
|
4
|
+
describe PairwisePrioritySet do
|
5
|
+
it_behaves_like 'a set'
|
6
|
+
|
7
|
+
let(:set) do
|
8
|
+
ids = (1..5).to_a
|
9
|
+
priorities = ids.reverse
|
10
|
+
|
11
|
+
PairwisePrioritySet.new.tap do |priority_set|
|
12
|
+
ids.zip(priorities).each{ |id, priority| priority_set.add id, priority }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
let(:other_set) do
|
17
|
+
RandomSet.new.tap do |random_set|
|
18
|
+
1.upto(3).each{ |i| random_set.add i }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should sample elements in order' do
|
23
|
+
set.sample(2).should == [[1, 2], [3, 4]]
|
24
|
+
set.sample(3).length.should == 2
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should not include removed elements in subtractions' do
|
28
|
+
set.add 6, 0
|
29
|
+
set.subtract(other_set, 5).flatten.should == [4, 5]
|
30
|
+
set.remove 5
|
31
|
+
set.subtract(other_set, 5).flatten.should == [4, 6]
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should subtract another set' do
|
35
|
+
set.subtract(other_set, 5).length.should == 1
|
36
|
+
set.subtract(other_set, 5).first.should == [4, 5]
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should always return pairs' do
|
40
|
+
6.upto(8).each{ |i| set.add i, rand }
|
41
|
+
set.subtract(other_set, 3).each{ |pair| pair.length.should == 2 }
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'should update the priority' do
|
45
|
+
set.to_a.first.should == 1
|
46
|
+
set.to_h[1].should be_within(0.1).of(5)
|
47
|
+
set.add 1, 0
|
48
|
+
set.to_a.first.should == 2
|
49
|
+
set.to_a.last.should == 1
|
50
|
+
set.to_h[1].should be_within(0.1).of(0)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module DiffSet
|
4
|
+
describe PairwiseRandomSet do
|
5
|
+
it_behaves_like 'a set'
|
6
|
+
|
7
|
+
def create_set(elements)
|
8
|
+
PairwiseRandomSet.new.tap do |random_set|
|
9
|
+
1.upto(elements).each{ |i| random_set.add i }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
let(:set){ create_set(5) }
|
14
|
+
let(:other_set){ create_set(3) }
|
15
|
+
|
16
|
+
it 'should sample pairs of elements' do
|
17
|
+
set.sample(2).collect(&:length).should == [2, 2]
|
18
|
+
set.sample(3).length.should == 2
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should not include removed elements in subtractions' do
|
22
|
+
set.remove 5
|
23
|
+
set.subtract(other_set, 5).flatten.should_not include 5
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should subtract another set' do
|
27
|
+
set.subtract(other_set, 5).length.should == 1
|
28
|
+
set.subtract(other_set, 5).first.should =~ [4, 5]
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should always return pairs' do
|
32
|
+
6.upto(8).each{ |i| set.add i }
|
33
|
+
set.subtract(other_set, 3).each{ |pair| pair.length.should == 2 }
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'should mutate the order of the elements when sampling' do
|
37
|
+
set_before = set.to_a
|
38
|
+
set.sample 5
|
39
|
+
set_before.should =~ set.to_a
|
40
|
+
set_before.should_not == set.to_a
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should mutate the order of the elements on a subtraction' do
|
44
|
+
set_before = set.to_a
|
45
|
+
set.subtract other_set, 5
|
46
|
+
set_before.should =~ set.to_a
|
47
|
+
set_before.should_not == set.to_a
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module DiffSet
|
4
|
+
describe PrioritySet do
|
5
|
+
it_behaves_like 'a set'
|
6
|
+
|
7
|
+
let(:set) do
|
8
|
+
ids = (1..5).to_a
|
9
|
+
priorities = ids.reverse
|
10
|
+
|
11
|
+
PrioritySet.new.tap do |priority_set|
|
12
|
+
ids.zip(priorities).each{ |id, priority| priority_set.add id, priority }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
let(:other_set) do
|
17
|
+
RandomSet.new.tap do |random_set|
|
18
|
+
1.upto(3).each{ |i| random_set.add i }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should sample elements in order' do
|
23
|
+
set.sample(5).should == (1..5).to_a
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should subtract another set' do
|
27
|
+
set.subtract(other_set, 5).should == [4, 5]
|
28
|
+
set.subtract(other_set, 1).first.should == 4
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should not include removed elements in subtractions' do
|
32
|
+
set.remove 5
|
33
|
+
set.subtract(other_set, 5).should == [4]
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'should update the priority' do
|
37
|
+
set.to_a.first.should == 1
|
38
|
+
set.to_h[1].should be_within(0.1).of(5)
|
39
|
+
set.add 1, 0
|
40
|
+
set.to_a.first.should == 2
|
41
|
+
set.to_a.last.should == 1
|
42
|
+
set.to_h[1].should be_within(0.1).of(0)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module DiffSet
|
4
|
+
describe RandomSet do
|
5
|
+
it_behaves_like 'a set'
|
6
|
+
|
7
|
+
def create_set(elements)
|
8
|
+
RandomSet.new.tap do |random_set|
|
9
|
+
1.upto(elements).each{ |i| random_set.add i }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
let(:set){ create_set(5) }
|
14
|
+
let(:other_set){ create_set(3) }
|
15
|
+
|
16
|
+
it 'should subtract another set' do
|
17
|
+
set.subtract(other_set, 5).should =~ [4, 5]
|
18
|
+
[4, 5].should include set.subtract(other_set, 1).first
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should not include removed elements in subtractions' do
|
22
|
+
set.remove 5
|
23
|
+
set.subtract(other_set, 5).should == [4]
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should mutate the order of the elements when sampling' do
|
27
|
+
set_before = set.to_a
|
28
|
+
set.sample 5
|
29
|
+
set_before.should =~ set.to_a
|
30
|
+
set_before.should_not == set.to_a
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'should mutate the order of the elements on a subtraction' do
|
34
|
+
set_before = set.to_a
|
35
|
+
set.subtract other_set, 5
|
36
|
+
set_before.should =~ set.to_a
|
37
|
+
set_before.should_not == set.to_a
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
root = File.expand_path File.join(File.dirname(__FILE__), '../')
|
2
|
+
%w(lib ext).each do |name|
|
3
|
+
dir = File.join root, name
|
4
|
+
$LOAD_PATH.unshift dir unless $LOAD_PATH.include? dir
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'pry'
|
8
|
+
require 'diff_set'
|
9
|
+
|
10
|
+
Dir["./spec/support/**/*.rb"].sort.each{ |f| require f }
|
11
|
+
|
12
|
+
RSpec.configure do |config|
|
13
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
14
|
+
config.run_all_when_everything_filtered = true
|
15
|
+
config.filter_run :focus
|
16
|
+
config.order = 'random'
|
17
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
shared_examples_for 'a set' do
|
2
|
+
it 'should convert to an Array' do
|
3
|
+
set.to_a.should =~ (1..5).to_a
|
4
|
+
end
|
5
|
+
|
6
|
+
it 'should add elements' do
|
7
|
+
set.add 100
|
8
|
+
set.to_a.should include 100
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'should remove elements' do
|
12
|
+
set.remove 1
|
13
|
+
set.to_a.should_not include 1
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should sample elements' do
|
17
|
+
set.sample(2).length.should == 2
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should not include removed elements in samples' do
|
21
|
+
set.remove 5
|
22
|
+
set.sample(5).should_not include 5
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'should know how many elements it contains' do
|
26
|
+
expect{ set.add 100 }.to change{ set.size }.from(5).to 6
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'should know if it contains an element' do
|
30
|
+
set.should_not include 100
|
31
|
+
set.add 100
|
32
|
+
set.should include 100
|
33
|
+
end
|
34
|
+
end
|
metadata
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: diff_set
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Michael Parrish
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake-compiler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.9.2
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.9.2
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rice
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.6'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.6'
|
97
|
+
description: ''
|
98
|
+
email:
|
99
|
+
- michael@zooniverse.org
|
100
|
+
executables: []
|
101
|
+
extensions:
|
102
|
+
- ext/diff_set/extconf.rb
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- ".gitignore"
|
106
|
+
- ".rspec"
|
107
|
+
- Gemfile
|
108
|
+
- LICENSE.txt
|
109
|
+
- README.md
|
110
|
+
- Rakefile
|
111
|
+
- diff_set.gemspec
|
112
|
+
- ext/diff_set/diff_set_ext.cpp
|
113
|
+
- ext/diff_set/extconf.rb
|
114
|
+
- ext/diff_set/priority_set.cpp
|
115
|
+
- ext/diff_set/priority_set.h
|
116
|
+
- ext/diff_set/random_set.cpp
|
117
|
+
- ext/diff_set/random_set.h
|
118
|
+
- lib/diff_set.rb
|
119
|
+
- lib/diff_set/pairwise.rb
|
120
|
+
- lib/diff_set/pairwise_priority_set.rb
|
121
|
+
- lib/diff_set/pairwise_random_set.rb
|
122
|
+
- lib/diff_set/version.rb
|
123
|
+
- spec/pairwise_priority_set_spec.rb
|
124
|
+
- spec/pairwise_random_set_spec.rb
|
125
|
+
- spec/priority_set_spec.rb
|
126
|
+
- spec/random_set_spec.rb
|
127
|
+
- spec/spec_helper.rb
|
128
|
+
- spec/support/shared_examples_for_set.rb
|
129
|
+
homepage: https://github.com/parrish/diff_set
|
130
|
+
licenses:
|
131
|
+
- MIT
|
132
|
+
metadata: {}
|
133
|
+
post_install_message:
|
134
|
+
rdoc_options: []
|
135
|
+
require_paths:
|
136
|
+
- lib
|
137
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
138
|
+
requirements:
|
139
|
+
- - ">="
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
143
|
+
requirements:
|
144
|
+
- - ">="
|
145
|
+
- !ruby/object:Gem::Version
|
146
|
+
version: '0'
|
147
|
+
requirements: []
|
148
|
+
rubyforge_project:
|
149
|
+
rubygems_version: 2.2.2
|
150
|
+
signing_key:
|
151
|
+
specification_version: 4
|
152
|
+
summary: DiffSet contains a collection of data structures optimized to perform partial
|
153
|
+
set subtractions
|
154
|
+
test_files:
|
155
|
+
- spec/pairwise_priority_set_spec.rb
|
156
|
+
- spec/pairwise_random_set_spec.rb
|
157
|
+
- spec/priority_set_spec.rb
|
158
|
+
- spec/random_set_spec.rb
|
159
|
+
- spec/spec_helper.rb
|
160
|
+
- spec/support/shared_examples_for_set.rb
|