diff_set 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +24 -0
- data/.rspec +2 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +22 -0
- data/README.md +46 -0
- data/Rakefile +20 -0
- data/diff_set.gemspec +28 -0
- data/ext/diff_set/diff_set_ext.cpp +39 -0
- data/ext/diff_set/extconf.rb +4 -0
- data/ext/diff_set/priority_set.cpp +99 -0
- data/ext/diff_set/priority_set.h +59 -0
- data/ext/diff_set/random_set.cpp +108 -0
- data/ext/diff_set/random_set.h +33 -0
- data/lib/diff_set/pairwise.rb +23 -0
- data/lib/diff_set/pairwise_priority_set.rb +5 -0
- data/lib/diff_set/pairwise_random_set.rb +5 -0
- data/lib/diff_set/version.rb +3 -0
- data/lib/diff_set.rb +10 -0
- data/spec/pairwise_priority_set_spec.rb +53 -0
- data/spec/pairwise_random_set_spec.rb +50 -0
- data/spec/priority_set_spec.rb +45 -0
- data/spec/random_set_spec.rb +40 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/support/shared_examples_for_set.rb +34 -0
- metadata +160 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: deaf2800e3064f34e2e8b253ddec8cae53d29951
|
4
|
+
data.tar.gz: d61484cba1501f4f453996829e2347698b28b77a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 74d052ee5da4650f47ab40a1d491cc14678597b76a4110368daf09b85680de9cf075077a7d16c86e46fe359d0c14797b8f2ff2e8313c6d8b0c2429384992f360
|
7
|
+
data.tar.gz: 80e99fa750b79cae19630c94bb337425b62a2bd2b3f2452a0b7359ce2db5bb0c96574b23141178e079c2eb3b3995815b4012aa23ad06b0ffdde96a71828fa6f2
|
data/.gitignore
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
.DS_Store
|
19
|
+
.rvmrc
|
20
|
+
.ruby-version
|
21
|
+
*.o
|
22
|
+
Makefile
|
23
|
+
*.bundle
|
24
|
+
*.so
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Michael Parrish
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
# DiffSet
|
2
|
+
|
3
|
+
DiffSet contains a collection of data structures optimized to perform partial set subtractions.
|
4
|
+
|
5
|
+
- `DiffSet::RandomSet` Produces a randomized set difference
|
6
|
+
|
7
|
+
- `DiffSet::PrioritySet` Produces an ordered set difference
|
8
|
+
|
9
|
+
- `DiffSet::PairwiseRandomSet` Presents a random set difference as a list of pairs
|
10
|
+
|
11
|
+
- `DiffSet::PairwisePrioritySet` Presents an ordered set difference as a list of pairs
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
1. Install [Boost](http://www.boost.org/):
|
16
|
+
|
17
|
+
- OS X: `brew update && brew install boost`
|
18
|
+
|
19
|
+
- Ubuntu: `sudo apt-get update && sudo apt-get install libboost-all-dev`
|
20
|
+
|
21
|
+
3. Add this line to your application's Gemfile: `gem 'diff_set'`
|
22
|
+
|
23
|
+
4. And then execute: `bundle`
|
24
|
+
|
25
|
+
To install rice **Ruby must be compiled with shared libraries enabled**
|
26
|
+
|
27
|
+
- rvm: `rvm reinstall [version] -- --enable-shared`
|
28
|
+
|
29
|
+
- rbenv: `CONFIGURE_OPTS="--enable-shared" rbenv install [version]`
|
30
|
+
|
31
|
+
|
32
|
+
## Usage
|
33
|
+
|
34
|
+
The API is pretty straightforward, and [the specs](https://github.com/parrish/diff_set/tree/master/spec) have examples.
|
35
|
+
|
36
|
+
## Testing
|
37
|
+
|
38
|
+
Run the specs with `rake`
|
39
|
+
|
40
|
+
## Contributing
|
41
|
+
|
42
|
+
1. Fork it ( http://github.com/parrish/diff_set/fork )
|
43
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
44
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
45
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
46
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/extensiontask'
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
require 'rubygems/package_task'
|
5
|
+
|
6
|
+
GEMSPEC = Gem::Specification.load 'diff_set.gemspec'
|
7
|
+
|
8
|
+
Gem::PackageTask.new(GEMSPEC) do |pkg|
|
9
|
+
pkg.need_zip = true
|
10
|
+
pkg.need_tar = true
|
11
|
+
end
|
12
|
+
|
13
|
+
Rake::ExtensionTask.new('diff_set_ext', GEMSPEC) do |ext|
|
14
|
+
ext.ext_dir = 'ext/diff_set'
|
15
|
+
ext.lib_dir = 'lib/diff_set'
|
16
|
+
ext.source_pattern = '*.{h,cpp}'
|
17
|
+
end
|
18
|
+
|
19
|
+
RSpec::Core::RakeTask.new :spec
|
20
|
+
task default: [:compile, :spec]
|
data/diff_set.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'diff_set/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'diff_set'
|
8
|
+
spec.version = DiffSet::VERSION
|
9
|
+
spec.authors = ['Michael Parrish']
|
10
|
+
spec.email = ['michael@zooniverse.org']
|
11
|
+
spec.summary = 'DiffSet contains a collection of data structures optimized to perform partial set subtractions'
|
12
|
+
spec.description = ''
|
13
|
+
spec.homepage = 'https://github.com/parrish/diff_set'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
spec.extensions = ['ext/diff_set/extconf.rb']
|
21
|
+
|
22
|
+
spec.add_development_dependency 'bundler', '~> 1.5'
|
23
|
+
spec.add_development_dependency 'rake'
|
24
|
+
spec.add_development_dependency 'rake-compiler', '~> 0.9.2'
|
25
|
+
spec.add_development_dependency 'rspec'
|
26
|
+
spec.add_development_dependency 'pry'
|
27
|
+
spec.add_runtime_dependency 'rice', '~> 1.6'
|
28
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#include "rice/Class.hpp"
|
2
|
+
#include "rice/Module.hpp"
|
3
|
+
#include "rice/ruby_try_catch.hpp"
|
4
|
+
#include "rice/Data_Type.hpp"
|
5
|
+
#include "rice/Constructor.hpp"
|
6
|
+
using namespace Rice;
|
7
|
+
|
8
|
+
#include "random_set.h"
|
9
|
+
#include "priority_set.h"
|
10
|
+
|
11
|
+
extern "C"
|
12
|
+
void Init_diff_set_ext() {
|
13
|
+
RUBY_TRY
|
14
|
+
{
|
15
|
+
Module rb_mDiffSet = define_module("DiffSet");
|
16
|
+
|
17
|
+
Data_Type<RandomSet> rb_cRandomSet = define_class_under<RandomSet>(rb_mDiffSet, "RandomSet")
|
18
|
+
.define_constructor(Constructor<RandomSet>())
|
19
|
+
.define_method("add", &RandomSet::add, (Arg("id"), Arg("priority") = 0.0))
|
20
|
+
.define_method("remove", &RandomSet::remove)
|
21
|
+
.define_method("sample", &RandomSet::sample)
|
22
|
+
.define_method("subtract", &RandomSet::subtract)
|
23
|
+
.define_method("include?", &RandomSet::includes)
|
24
|
+
.define_method("to_a", &RandomSet::to_a)
|
25
|
+
.define_method("size", &RandomSet::size);
|
26
|
+
|
27
|
+
Data_Type<PrioritySet> rb_cPrioritySet = define_class_under<PrioritySet>(rb_mDiffSet, "PrioritySet")
|
28
|
+
.define_constructor(Constructor<PrioritySet>())
|
29
|
+
.define_method("add", &PrioritySet::add, (Arg("id"), Arg("priority") = 0.0))
|
30
|
+
.define_method("remove", &PrioritySet::remove)
|
31
|
+
.define_method("sample", &PrioritySet::sample)
|
32
|
+
.define_method("subtract", &PrioritySet::subtract)
|
33
|
+
.define_method("include?", &PrioritySet::includes)
|
34
|
+
.define_method("to_a", &PrioritySet::to_a)
|
35
|
+
.define_method("to_h", &PrioritySet::to_h)
|
36
|
+
.define_method("size", &PrioritySet::size);
|
37
|
+
}
|
38
|
+
RUBY_CATCH
|
39
|
+
}
|
@@ -0,0 +1,99 @@
|
|
1
|
+
#include "priority_set.h"
|
2
|
+
|
3
|
+
PrioritySet::PrioritySet() {
|
4
|
+
timeval time;
|
5
|
+
gettimeofday(&time, NULL);
|
6
|
+
long millis = (time.tv_sec * 1000.0) + (time.tv_usec / 1000.0);
|
7
|
+
this->rng.seed((uint)millis);
|
8
|
+
}
|
9
|
+
|
10
|
+
void PrioritySet::add(int id, double priority) {
|
11
|
+
if(includes(id)) {
|
12
|
+
element_handle handle = this->element_handles[id];
|
13
|
+
(*handle).priority = priority;
|
14
|
+
this->heap.update(handle);
|
15
|
+
return;
|
16
|
+
}
|
17
|
+
|
18
|
+
static boost::uniform_01<boost::random::mt19937> dist(this->rng);
|
19
|
+
element_handle handle = this->heap.push(element(id, priority, dist()));
|
20
|
+
this->element_handles.insert(std::make_pair<int, element_handle>(id, handle));
|
21
|
+
this->element_set.insert(id);
|
22
|
+
}
|
23
|
+
|
24
|
+
void PrioritySet::remove(int id) {
|
25
|
+
boost::unordered_set<int>::iterator set_it = this->element_set.find(id);
|
26
|
+
|
27
|
+
if(set_it != this->element_set.end()) {
|
28
|
+
this->element_set.erase(set_it);
|
29
|
+
element_handle handle = this->element_handles[id];
|
30
|
+
(*handle).priority = std::numeric_limits<int>::min();
|
31
|
+
(*handle).enabled = false;
|
32
|
+
this->heap.decrease(handle);
|
33
|
+
}
|
34
|
+
}
|
35
|
+
|
36
|
+
Array PrioritySet::sample(int limit) {
|
37
|
+
Array sampled;
|
38
|
+
fibonacci_heap::ordered_iterator it;
|
39
|
+
|
40
|
+
for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
|
41
|
+
if(it->enabled) {
|
42
|
+
sampled.push(it->id);
|
43
|
+
if(sampled.size() >= (size_t)limit) {
|
44
|
+
break;
|
45
|
+
}
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
return sampled;
|
50
|
+
}
|
51
|
+
|
52
|
+
bool PrioritySet::includes(int id) {
|
53
|
+
boost::unordered_set<int>::const_iterator it;
|
54
|
+
it = this->element_set.find(id);
|
55
|
+
return it != this->element_set.end();
|
56
|
+
}
|
57
|
+
|
58
|
+
Array PrioritySet::subtract(RandomSet &other, size_t limit) {
|
59
|
+
Array diff;
|
60
|
+
fibonacci_heap::ordered_iterator it;
|
61
|
+
boost::unordered_set<int>::const_iterator in_other;
|
62
|
+
|
63
|
+
for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
|
64
|
+
if(it->enabled && other.element_set.find(it->id) == other.element_set.end()) {
|
65
|
+
diff.push(it->id);
|
66
|
+
if(diff.size() >= limit) {
|
67
|
+
break;
|
68
|
+
}
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
return diff;
|
73
|
+
}
|
74
|
+
|
75
|
+
Array PrioritySet::to_a() {
|
76
|
+
Array array;
|
77
|
+
fibonacci_heap::ordered_iterator it;
|
78
|
+
for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
|
79
|
+
if(it->enabled) {
|
80
|
+
array.push(it->id);
|
81
|
+
}
|
82
|
+
}
|
83
|
+
return array;
|
84
|
+
}
|
85
|
+
|
86
|
+
Hash PrioritySet::to_h() {
|
87
|
+
Hash hash;
|
88
|
+
fibonacci_heap::ordered_iterator it;
|
89
|
+
for(it = this->heap.ordered_begin(); it != this->heap.ordered_end(); it++) {
|
90
|
+
if(it->enabled) {
|
91
|
+
hash[it->id] = it->priority;
|
92
|
+
}
|
93
|
+
}
|
94
|
+
return hash;
|
95
|
+
}
|
96
|
+
|
97
|
+
size_t PrioritySet::size() {
|
98
|
+
return this->element_set.size();
|
99
|
+
}
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#ifndef PRIORITY_SET_H
|
2
|
+
#define PRIORITY_SET_H
|
3
|
+
|
4
|
+
#include "rice/Object.hpp"
|
5
|
+
#include "rice/Array.hpp"
|
6
|
+
#include "rice/Hash.hpp"
|
7
|
+
using namespace Rice;
|
8
|
+
|
9
|
+
#include <boost/random.hpp>
|
10
|
+
#include <boost/heap/fibonacci_heap.hpp>
|
11
|
+
#include <boost/unordered_set.hpp>
|
12
|
+
#include <boost/unordered_map.hpp>
|
13
|
+
|
14
|
+
#include <sys/time.h>
|
15
|
+
#include <limits>
|
16
|
+
|
17
|
+
#include "random_set.h"
|
18
|
+
|
19
|
+
class PrioritySet {
|
20
|
+
public:
|
21
|
+
struct element {
|
22
|
+
int id;
|
23
|
+
double priority;
|
24
|
+
double random;
|
25
|
+
bool enabled;
|
26
|
+
|
27
|
+
element(int id, double priority, double random) {
|
28
|
+
enabled = true;
|
29
|
+
this->id = id;
|
30
|
+
this->priority = priority;
|
31
|
+
this->random = random;
|
32
|
+
}
|
33
|
+
};
|
34
|
+
|
35
|
+
struct comparator {
|
36
|
+
bool operator()(const element &a, const element &b) const {
|
37
|
+
return (a.priority < b.priority) || (a.priority == b.priority && a.random < b.random);
|
38
|
+
}
|
39
|
+
};
|
40
|
+
|
41
|
+
PrioritySet();
|
42
|
+
void add(int id, double priority = 0.0);
|
43
|
+
void remove(int id);
|
44
|
+
Array sample(int limit);
|
45
|
+
bool includes(int id);
|
46
|
+
Array subtract(RandomSet &other, size_t limit);
|
47
|
+
Array to_a();
|
48
|
+
Hash to_h();
|
49
|
+
size_t size();
|
50
|
+
protected:
|
51
|
+
typedef boost::heap::fibonacci_heap<element, boost::heap::compare<comparator> > fibonacci_heap;
|
52
|
+
typedef fibonacci_heap::handle_type element_handle;
|
53
|
+
fibonacci_heap heap;
|
54
|
+
boost::unordered_map<int, element_handle > element_handles;
|
55
|
+
boost::unordered_set<int> element_set;
|
56
|
+
boost::random::mt19937 rng;
|
57
|
+
};
|
58
|
+
|
59
|
+
#endif
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#include "random_set.h"
|
2
|
+
|
3
|
+
RandomSet::RandomSet() {
|
4
|
+
timeval time;
|
5
|
+
gettimeofday(&time, NULL);
|
6
|
+
long millis = (time.tv_sec * 1000.0) + (time.tv_usec / 1000.0);
|
7
|
+
this->rng.seed((uint)millis);
|
8
|
+
}
|
9
|
+
|
10
|
+
void RandomSet::add(int element, double priority) {
|
11
|
+
this->element_set.insert(element);
|
12
|
+
this->elements.push_back(element);
|
13
|
+
}
|
14
|
+
|
15
|
+
void RandomSet::remove(int element) {
|
16
|
+
boost::unordered_set<int>::iterator set_it = this->element_set.find(element);
|
17
|
+
|
18
|
+
if(set_it != this->element_set.end()) {
|
19
|
+
this->element_set.erase(set_it);
|
20
|
+
|
21
|
+
std::vector<int>::iterator it = iterator_to(element);
|
22
|
+
if(it != this->elements.end()) {
|
23
|
+
this->elements.erase(it);
|
24
|
+
}
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
Array RandomSet::sample(int limit) {
|
29
|
+
Array sampled;
|
30
|
+
int swapIndex;
|
31
|
+
int tmp;
|
32
|
+
int upper_bound = (int)this->elements.size() - 1;
|
33
|
+
boost::random::uniform_int_distribution<> dist;
|
34
|
+
|
35
|
+
for(int i = 0; i < limit && i < (int)this->elements.size(); i++) {
|
36
|
+
dist = boost::random::uniform_int_distribution<>(std::min(i + 1, upper_bound), upper_bound);
|
37
|
+
swapIndex = dist(rng);
|
38
|
+
tmp = this->elements[i];
|
39
|
+
this->elements[i] = this->elements[swapIndex];
|
40
|
+
this->elements[swapIndex] = tmp;
|
41
|
+
sampled.push(this->elements[i]);
|
42
|
+
}
|
43
|
+
|
44
|
+
return sampled;
|
45
|
+
}
|
46
|
+
|
47
|
+
bool RandomSet::includes(int element) {
|
48
|
+
boost::unordered_set<int>::const_iterator it;
|
49
|
+
it = this->element_set.find(element);
|
50
|
+
return it != this->element_set.end();
|
51
|
+
}
|
52
|
+
|
53
|
+
std::vector<int>::iterator RandomSet::iterator_to(int element) {
|
54
|
+
std::vector<int>::iterator it;
|
55
|
+
for(it = this->elements.begin(); it != this->elements.end(); it++) {
|
56
|
+
if(element == *it) {
|
57
|
+
return it;
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
return this->elements.end();
|
62
|
+
}
|
63
|
+
|
64
|
+
Array RandomSet::subtract(RandomSet &other, size_t limit) {
|
65
|
+
Array diff;
|
66
|
+
int element;
|
67
|
+
int swapIndex;
|
68
|
+
int tmp;
|
69
|
+
int upper_bound = (int)this->elements.size() - 1;
|
70
|
+
|
71
|
+
boost::unordered_set<int>::const_iterator in_other;
|
72
|
+
boost::random::uniform_int_distribution<> dist;
|
73
|
+
|
74
|
+
for(int i = 0; i < (int)this->elements.size(); i++) {
|
75
|
+
dist = boost::random::uniform_int_distribution<>(std::min(i + 1, upper_bound), upper_bound);
|
76
|
+
swapIndex = dist(rng);
|
77
|
+
tmp = this->elements[i];
|
78
|
+
this->elements[i] = this->elements[swapIndex];
|
79
|
+
this->elements[swapIndex] = tmp;
|
80
|
+
|
81
|
+
element = this->elements[i];
|
82
|
+
in_other = other.element_set.find(element);
|
83
|
+
|
84
|
+
if(in_other == other.element_set.end()) {
|
85
|
+
diff.push(element);
|
86
|
+
if(diff.size() >= limit) {
|
87
|
+
break;
|
88
|
+
}
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
return diff;
|
93
|
+
}
|
94
|
+
|
95
|
+
Array RandomSet::to_a() {
|
96
|
+
Array array;
|
97
|
+
|
98
|
+
std::vector<int>::iterator it;
|
99
|
+
for(it = this->elements.begin(); it != this->elements.end(); it++) {
|
100
|
+
array.push(*it);
|
101
|
+
}
|
102
|
+
|
103
|
+
return array;
|
104
|
+
}
|
105
|
+
|
106
|
+
size_t RandomSet::size() {
|
107
|
+
return this->elements.size();
|
108
|
+
}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#ifndef RANDOM_SET_H
|
2
|
+
#define RANDOM_SET_H
|
3
|
+
|
4
|
+
#include "rice/Object.hpp"
|
5
|
+
#include "rice/Array.hpp"
|
6
|
+
using namespace Rice;
|
7
|
+
|
8
|
+
#include <boost/random/mersenne_twister.hpp>
|
9
|
+
#include <boost/random/uniform_int_distribution.hpp>
|
10
|
+
#include <boost/unordered_set.hpp>
|
11
|
+
|
12
|
+
#include <vector>
|
13
|
+
#include <sys/time.h>
|
14
|
+
|
15
|
+
class RandomSet {
|
16
|
+
public:
|
17
|
+
RandomSet();
|
18
|
+
void add(int element, double priority = 0.0);
|
19
|
+
void remove(int element);
|
20
|
+
Array sample(int limit);
|
21
|
+
bool includes(int element);
|
22
|
+
Array subtract(RandomSet &other, size_t limit);
|
23
|
+
Array to_a();
|
24
|
+
size_t size();
|
25
|
+
boost::unordered_set<int> element_set;
|
26
|
+
protected:
|
27
|
+
std::vector<int> elements;
|
28
|
+
boost::random::mt19937 rng;
|
29
|
+
|
30
|
+
std::vector<int>::iterator iterator_to(int element);
|
31
|
+
};
|
32
|
+
|
33
|
+
#endif
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module DiffSet
|
2
|
+
module Pairwise
|
3
|
+
def self.included(klass)
|
4
|
+
klass.class_eval do
|
5
|
+
alias_method :_c_subtract, :subtract
|
6
|
+
def subtract(set, limit)
|
7
|
+
_in_pairs _c_subtract(set, 2 * limit)
|
8
|
+
end
|
9
|
+
|
10
|
+
alias_method :_c_sample, :sample
|
11
|
+
def sample(limit)
|
12
|
+
_in_pairs _c_sample(2 * limit)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
protected
|
18
|
+
|
19
|
+
def _in_pairs(list)
|
20
|
+
list.each_slice(2).to_a.reject{ |pair| pair.length != 2 }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/diff_set.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module DiffSet
|
4
|
+
describe PairwisePrioritySet do
|
5
|
+
it_behaves_like 'a set'
|
6
|
+
|
7
|
+
let(:set) do
|
8
|
+
ids = (1..5).to_a
|
9
|
+
priorities = ids.reverse
|
10
|
+
|
11
|
+
PairwisePrioritySet.new.tap do |priority_set|
|
12
|
+
ids.zip(priorities).each{ |id, priority| priority_set.add id, priority }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
let(:other_set) do
|
17
|
+
RandomSet.new.tap do |random_set|
|
18
|
+
1.upto(3).each{ |i| random_set.add i }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should sample elements in order' do
|
23
|
+
set.sample(2).should == [[1, 2], [3, 4]]
|
24
|
+
set.sample(3).length.should == 2
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should not include removed elements in subtractions' do
|
28
|
+
set.add 6, 0
|
29
|
+
set.subtract(other_set, 5).flatten.should == [4, 5]
|
30
|
+
set.remove 5
|
31
|
+
set.subtract(other_set, 5).flatten.should == [4, 6]
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should subtract another set' do
|
35
|
+
set.subtract(other_set, 5).length.should == 1
|
36
|
+
set.subtract(other_set, 5).first.should == [4, 5]
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should always return pairs' do
|
40
|
+
6.upto(8).each{ |i| set.add i, rand }
|
41
|
+
set.subtract(other_set, 3).each{ |pair| pair.length.should == 2 }
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'should update the priority' do
|
45
|
+
set.to_a.first.should == 1
|
46
|
+
set.to_h[1].should be_within(0.1).of(5)
|
47
|
+
set.add 1, 0
|
48
|
+
set.to_a.first.should == 2
|
49
|
+
set.to_a.last.should == 1
|
50
|
+
set.to_h[1].should be_within(0.1).of(0)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module DiffSet
|
4
|
+
describe PairwiseRandomSet do
|
5
|
+
it_behaves_like 'a set'
|
6
|
+
|
7
|
+
def create_set(elements)
|
8
|
+
PairwiseRandomSet.new.tap do |random_set|
|
9
|
+
1.upto(elements).each{ |i| random_set.add i }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
let(:set){ create_set(5) }
|
14
|
+
let(:other_set){ create_set(3) }
|
15
|
+
|
16
|
+
it 'should sample pairs of elements' do
|
17
|
+
set.sample(2).collect(&:length).should == [2, 2]
|
18
|
+
set.sample(3).length.should == 2
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should not include removed elements in subtractions' do
|
22
|
+
set.remove 5
|
23
|
+
set.subtract(other_set, 5).flatten.should_not include 5
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should subtract another set' do
|
27
|
+
set.subtract(other_set, 5).length.should == 1
|
28
|
+
set.subtract(other_set, 5).first.should =~ [4, 5]
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should always return pairs' do
|
32
|
+
6.upto(8).each{ |i| set.add i }
|
33
|
+
set.subtract(other_set, 3).each{ |pair| pair.length.should == 2 }
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'should mutate the order of the elements when sampling' do
|
37
|
+
set_before = set.to_a
|
38
|
+
set.sample 5
|
39
|
+
set_before.should =~ set.to_a
|
40
|
+
set_before.should_not == set.to_a
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should mutate the order of the elements on a subtraction' do
|
44
|
+
set_before = set.to_a
|
45
|
+
set.subtract other_set, 5
|
46
|
+
set_before.should =~ set.to_a
|
47
|
+
set_before.should_not == set.to_a
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module DiffSet
|
4
|
+
describe PrioritySet do
|
5
|
+
it_behaves_like 'a set'
|
6
|
+
|
7
|
+
let(:set) do
|
8
|
+
ids = (1..5).to_a
|
9
|
+
priorities = ids.reverse
|
10
|
+
|
11
|
+
PrioritySet.new.tap do |priority_set|
|
12
|
+
ids.zip(priorities).each{ |id, priority| priority_set.add id, priority }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
let(:other_set) do
|
17
|
+
RandomSet.new.tap do |random_set|
|
18
|
+
1.upto(3).each{ |i| random_set.add i }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should sample elements in order' do
|
23
|
+
set.sample(5).should == (1..5).to_a
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should subtract another set' do
|
27
|
+
set.subtract(other_set, 5).should == [4, 5]
|
28
|
+
set.subtract(other_set, 1).first.should == 4
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should not include removed elements in subtractions' do
|
32
|
+
set.remove 5
|
33
|
+
set.subtract(other_set, 5).should == [4]
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'should update the priority' do
|
37
|
+
set.to_a.first.should == 1
|
38
|
+
set.to_h[1].should be_within(0.1).of(5)
|
39
|
+
set.add 1, 0
|
40
|
+
set.to_a.first.should == 2
|
41
|
+
set.to_a.last.should == 1
|
42
|
+
set.to_h[1].should be_within(0.1).of(0)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module DiffSet
|
4
|
+
describe RandomSet do
|
5
|
+
it_behaves_like 'a set'
|
6
|
+
|
7
|
+
def create_set(elements)
|
8
|
+
RandomSet.new.tap do |random_set|
|
9
|
+
1.upto(elements).each{ |i| random_set.add i }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
let(:set){ create_set(5) }
|
14
|
+
let(:other_set){ create_set(3) }
|
15
|
+
|
16
|
+
it 'should subtract another set' do
|
17
|
+
set.subtract(other_set, 5).should =~ [4, 5]
|
18
|
+
[4, 5].should include set.subtract(other_set, 1).first
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should not include removed elements in subtractions' do
|
22
|
+
set.remove 5
|
23
|
+
set.subtract(other_set, 5).should == [4]
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should mutate the order of the elements when sampling' do
|
27
|
+
set_before = set.to_a
|
28
|
+
set.sample 5
|
29
|
+
set_before.should =~ set.to_a
|
30
|
+
set_before.should_not == set.to_a
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'should mutate the order of the elements on a subtraction' do
|
34
|
+
set_before = set.to_a
|
35
|
+
set.subtract other_set, 5
|
36
|
+
set_before.should =~ set.to_a
|
37
|
+
set_before.should_not == set.to_a
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
root = File.expand_path File.join(File.dirname(__FILE__), '../')
|
2
|
+
%w(lib ext).each do |name|
|
3
|
+
dir = File.join root, name
|
4
|
+
$LOAD_PATH.unshift dir unless $LOAD_PATH.include? dir
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'pry'
|
8
|
+
require 'diff_set'
|
9
|
+
|
10
|
+
Dir["./spec/support/**/*.rb"].sort.each{ |f| require f }
|
11
|
+
|
12
|
+
RSpec.configure do |config|
|
13
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
14
|
+
config.run_all_when_everything_filtered = true
|
15
|
+
config.filter_run :focus
|
16
|
+
config.order = 'random'
|
17
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
shared_examples_for 'a set' do
|
2
|
+
it 'should convert to an Array' do
|
3
|
+
set.to_a.should =~ (1..5).to_a
|
4
|
+
end
|
5
|
+
|
6
|
+
it 'should add elements' do
|
7
|
+
set.add 100
|
8
|
+
set.to_a.should include 100
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'should remove elements' do
|
12
|
+
set.remove 1
|
13
|
+
set.to_a.should_not include 1
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should sample elements' do
|
17
|
+
set.sample(2).length.should == 2
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should not include removed elements in samples' do
|
21
|
+
set.remove 5
|
22
|
+
set.sample(5).should_not include 5
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'should know how many elements it contains' do
|
26
|
+
expect{ set.add 100 }.to change{ set.size }.from(5).to 6
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'should know if it contains an element' do
|
30
|
+
set.should_not include 100
|
31
|
+
set.add 100
|
32
|
+
set.should include 100
|
33
|
+
end
|
34
|
+
end
|
metadata
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: diff_set
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Michael Parrish
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake-compiler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.9.2
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.9.2
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pry
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rice
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.6'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.6'
|
97
|
+
description: ''
|
98
|
+
email:
|
99
|
+
- michael@zooniverse.org
|
100
|
+
executables: []
|
101
|
+
extensions:
|
102
|
+
- ext/diff_set/extconf.rb
|
103
|
+
extra_rdoc_files: []
|
104
|
+
files:
|
105
|
+
- ".gitignore"
|
106
|
+
- ".rspec"
|
107
|
+
- Gemfile
|
108
|
+
- LICENSE.txt
|
109
|
+
- README.md
|
110
|
+
- Rakefile
|
111
|
+
- diff_set.gemspec
|
112
|
+
- ext/diff_set/diff_set_ext.cpp
|
113
|
+
- ext/diff_set/extconf.rb
|
114
|
+
- ext/diff_set/priority_set.cpp
|
115
|
+
- ext/diff_set/priority_set.h
|
116
|
+
- ext/diff_set/random_set.cpp
|
117
|
+
- ext/diff_set/random_set.h
|
118
|
+
- lib/diff_set.rb
|
119
|
+
- lib/diff_set/pairwise.rb
|
120
|
+
- lib/diff_set/pairwise_priority_set.rb
|
121
|
+
- lib/diff_set/pairwise_random_set.rb
|
122
|
+
- lib/diff_set/version.rb
|
123
|
+
- spec/pairwise_priority_set_spec.rb
|
124
|
+
- spec/pairwise_random_set_spec.rb
|
125
|
+
- spec/priority_set_spec.rb
|
126
|
+
- spec/random_set_spec.rb
|
127
|
+
- spec/spec_helper.rb
|
128
|
+
- spec/support/shared_examples_for_set.rb
|
129
|
+
homepage: https://github.com/parrish/diff_set
|
130
|
+
licenses:
|
131
|
+
- MIT
|
132
|
+
metadata: {}
|
133
|
+
post_install_message:
|
134
|
+
rdoc_options: []
|
135
|
+
require_paths:
|
136
|
+
- lib
|
137
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
138
|
+
requirements:
|
139
|
+
- - ">="
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
143
|
+
requirements:
|
144
|
+
- - ">="
|
145
|
+
- !ruby/object:Gem::Version
|
146
|
+
version: '0'
|
147
|
+
requirements: []
|
148
|
+
rubyforge_project:
|
149
|
+
rubygems_version: 2.2.2
|
150
|
+
signing_key:
|
151
|
+
specification_version: 4
|
152
|
+
summary: DiffSet contains a collection of data structures optimized to perform partial
|
153
|
+
set subtractions
|
154
|
+
test_files:
|
155
|
+
- spec/pairwise_priority_set_spec.rb
|
156
|
+
- spec/pairwise_random_set_spec.rb
|
157
|
+
- spec/priority_set_spec.rb
|
158
|
+
- spec/random_set_spec.rb
|
159
|
+
- spec/spec_helper.rb
|
160
|
+
- spec/support/shared_examples_for_set.rb
|