datasketches 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +80 -4
- data/ext/datasketches/cpc_wrapper.cpp +29 -11
- data/ext/datasketches/hll_wrapper.cpp +25 -16
- data/ext/datasketches/kll_wrapper.cpp +71 -14
- data/ext/datasketches/theta_wrapper.cpp +93 -3
- data/ext/datasketches/vo_wrapper.cpp +11 -9
- data/lib/datasketches/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d74bd4504918fd8217f9cf7d8c24442b5c196b2d874a042f6a2f30a24ae51db
|
4
|
+
data.tar.gz: 7521194ca0d05808b234c6cb1bb5150874c28f7e9536820f728b33b0aec8c52a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db41c21f18a51bf285e32556c52e06453810448d1d75d11cd1022576a98d5069977d65fe6bab7e5bd6425468531414fdd801a1bde1bfdf82fc05f455d97b80fd
|
7
|
+
data.tar.gz: 892e9f2c4b891194cfc1af8387494e78eb2e703b4a56f7c91e1b4784f0bd2439f35f8e25f92e954be44418d2964e804f892ab687df2acf8e9b64d43e0049cca0
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -18,6 +18,7 @@ Distinct counting
|
|
18
18
|
|
19
19
|
- [CPC sketch](#cpc-sketch)
|
20
20
|
- [HyperLogLog sketch](#hyperloglog-sketch)
|
21
|
+
- [Theta sketch](#theta-sketch)
|
21
22
|
|
22
23
|
Most frequent
|
23
24
|
|
@@ -36,7 +37,7 @@ Sampling
|
|
36
37
|
Create a sketch
|
37
38
|
|
38
39
|
```ruby
|
39
|
-
sketch = DataSketches::CpcSketch.new
|
40
|
+
sketch = DataSketches::CpcSketch.new
|
40
41
|
```
|
41
42
|
|
42
43
|
Add data
|
@@ -65,6 +66,15 @@ Load a sketch
|
|
65
66
|
sketch = DataSketches::CpcSketch.deserialize(data)
|
66
67
|
```
|
67
68
|
|
69
|
+
Get the union
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
u = DataSketches::CpcUnion.new(14)
|
73
|
+
u.update(sketch1)
|
74
|
+
u.update(sketch2)
|
75
|
+
u.result
|
76
|
+
```
|
77
|
+
|
68
78
|
## HyperLogLog Sketch
|
69
79
|
|
70
80
|
Create a sketch
|
@@ -107,7 +117,66 @@ Get the union
|
|
107
117
|
u = DataSketches::HllUnion.new(14)
|
108
118
|
u.update(sketch1)
|
109
119
|
u.update(sketch2)
|
110
|
-
u.
|
120
|
+
u.result
|
121
|
+
```
|
122
|
+
|
123
|
+
## Theta Sketch
|
124
|
+
|
125
|
+
Create a sketch
|
126
|
+
|
127
|
+
```ruby
|
128
|
+
sketch = DataSketches::UpdateThetaSketch.new
|
129
|
+
```
|
130
|
+
|
131
|
+
Add data
|
132
|
+
|
133
|
+
```ruby
|
134
|
+
sketch.update(1)
|
135
|
+
sketch.update(2.0)
|
136
|
+
sketch.update("three")
|
137
|
+
```
|
138
|
+
|
139
|
+
Estimate the count
|
140
|
+
|
141
|
+
```ruby
|
142
|
+
sketch.estimate
|
143
|
+
```
|
144
|
+
|
145
|
+
Save a sketch
|
146
|
+
|
147
|
+
```ruby
|
148
|
+
data = sketch.serialize
|
149
|
+
```
|
150
|
+
|
151
|
+
Load a sketch
|
152
|
+
|
153
|
+
```ruby
|
154
|
+
sketch = DataSketches::UpdateThetaSketch.deserialize(data)
|
155
|
+
```
|
156
|
+
|
157
|
+
Get the union
|
158
|
+
|
159
|
+
```ruby
|
160
|
+
u = DataSketches::ThetaUnion.new
|
161
|
+
u.update(sketch1)
|
162
|
+
u.update(sketch2)
|
163
|
+
u.result
|
164
|
+
```
|
165
|
+
|
166
|
+
Get the intersection
|
167
|
+
|
168
|
+
```ruby
|
169
|
+
i = DataSketches::ThetaIntersection.new
|
170
|
+
i.update(sketch1)
|
171
|
+
i.update(sketch2)
|
172
|
+
i.result
|
173
|
+
```
|
174
|
+
|
175
|
+
Compute A not B
|
176
|
+
|
177
|
+
```ruby
|
178
|
+
d = DataSketches::ThetaANotB.new
|
179
|
+
d.compute(a, b)
|
111
180
|
```
|
112
181
|
|
113
182
|
## Frequent Item Sketch
|
@@ -149,9 +218,9 @@ sketch = DataSketches::FrequentStringsSketch.deserialize(data)
|
|
149
218
|
Create a sketch
|
150
219
|
|
151
220
|
```ruby
|
152
|
-
sketch = DataSketches::KllIntsSketch.new
|
221
|
+
sketch = DataSketches::KllIntsSketch.new
|
153
222
|
# or
|
154
|
-
sketch = DataSketches::KllFloatsSketch.new
|
223
|
+
sketch = DataSketches::KllFloatsSketch.new
|
155
224
|
```
|
156
225
|
|
157
226
|
Add data
|
@@ -167,6 +236,7 @@ Get quantiles
|
|
167
236
|
```ruby
|
168
237
|
sketch.quantile(0.5)
|
169
238
|
sketch.quantiles([0.25, 0.5, 0.75])
|
239
|
+
sketch.quantiles(3)
|
170
240
|
```
|
171
241
|
|
172
242
|
Get the minimum and maximum values from the stream
|
@@ -210,6 +280,12 @@ sketch.update(2.0)
|
|
210
280
|
sketch.update("three")
|
211
281
|
```
|
212
282
|
|
283
|
+
Sample data
|
284
|
+
|
285
|
+
```ruby
|
286
|
+
sketch.samples
|
287
|
+
```
|
288
|
+
|
213
289
|
## Credits
|
214
290
|
|
215
291
|
This library is modeled after the DataSketches [Python API](https://github.com/apache/datasketches-cpp/tree/master/python).
|
@@ -1,20 +1,29 @@
|
|
1
1
|
#include <sstream>
|
2
2
|
|
3
3
|
#include <cpc_sketch.hpp>
|
4
|
+
#include <cpc_union.hpp>
|
4
5
|
|
5
6
|
#include <rice/Constructor.hpp>
|
6
7
|
#include <rice/Module.hpp>
|
7
8
|
|
9
|
+
using datasketches::cpc_sketch;
|
10
|
+
using datasketches::cpc_union;
|
11
|
+
|
12
|
+
using datasketches::CPC_DEFAULT_LG_K;
|
13
|
+
using datasketches::DEFAULT_SEED;
|
14
|
+
|
15
|
+
using Rice::Arg;
|
16
|
+
|
8
17
|
void init_cpc(Rice::Module& m) {
|
9
|
-
Rice::define_class_under<
|
10
|
-
.define_constructor(Rice::Constructor<
|
11
|
-
.define_method("lg_k", &
|
12
|
-
.define_method("empty?", &
|
13
|
-
.define_method("lower_bound", &
|
14
|
-
.define_method("upper_bound", &
|
18
|
+
Rice::define_class_under<cpc_sketch>(m, "CpcSketch")
|
19
|
+
.define_constructor(Rice::Constructor<cpc_sketch, uint8_t, uint64_t>(), (Rice::Arg("lg_k")=CPC_DEFAULT_LG_K, Rice::Arg("seed")=DEFAULT_SEED))
|
20
|
+
.define_method("lg_k", &cpc_sketch::get_lg_k)
|
21
|
+
.define_method("empty?", &cpc_sketch::is_empty)
|
22
|
+
.define_method("lower_bound", &cpc_sketch::get_lower_bound)
|
23
|
+
.define_method("upper_bound", &cpc_sketch::get_upper_bound)
|
15
24
|
.define_method(
|
16
25
|
"update",
|
17
|
-
*[](
|
26
|
+
*[](cpc_sketch& self, Rice::Object datum) {
|
18
27
|
if (FIXNUM_P(datum.value())) {
|
19
28
|
return self.update(from_ruby<int64_t>(datum));
|
20
29
|
} else if (datum.is_a(rb_cNumeric)) {
|
@@ -25,12 +34,12 @@ void init_cpc(Rice::Module& m) {
|
|
25
34
|
})
|
26
35
|
.define_method(
|
27
36
|
"estimate",
|
28
|
-
*[](
|
37
|
+
*[](cpc_sketch& self) {
|
29
38
|
return self.get_estimate();
|
30
39
|
})
|
31
40
|
.define_method(
|
32
41
|
"serialize",
|
33
|
-
*[](
|
42
|
+
*[](cpc_sketch& self) {
|
34
43
|
std::ostringstream oss;
|
35
44
|
self.serialize(oss);
|
36
45
|
return oss.str();
|
@@ -38,13 +47,22 @@ void init_cpc(Rice::Module& m) {
|
|
38
47
|
// TODO change to summary?
|
39
48
|
.define_method(
|
40
49
|
"to_string",
|
41
|
-
*[](
|
50
|
+
*[](cpc_sketch& self) {
|
42
51
|
return self.to_string();
|
43
52
|
})
|
44
53
|
.define_singleton_method(
|
45
54
|
"deserialize",
|
46
55
|
*[](std::string& is) {
|
47
56
|
std::istringstream iss(is);
|
48
|
-
return
|
57
|
+
return cpc_sketch::deserialize(iss);
|
58
|
+
});
|
59
|
+
|
60
|
+
Rice::define_class_under<cpc_union>(m, "CpcUnion")
|
61
|
+
.define_constructor(Rice::Constructor<cpc_union, uint8_t, uint64_t>(), (Rice::Arg("lg_k"), Rice::Arg("seed")=DEFAULT_SEED))
|
62
|
+
.define_method("result", &cpc_union::get_result)
|
63
|
+
.define_method(
|
64
|
+
"update",
|
65
|
+
*[](cpc_union& self, cpc_sketch& sketch) {
|
66
|
+
self.update(sketch);
|
49
67
|
});
|
50
68
|
}
|
@@ -5,15 +5,19 @@
|
|
5
5
|
#include <rice/Constructor.hpp>
|
6
6
|
#include <rice/Module.hpp>
|
7
7
|
|
8
|
+
using datasketches::hll_sketch;
|
9
|
+
using datasketches::hll_union;
|
10
|
+
|
8
11
|
void init_hll(Rice::Module& m) {
|
9
|
-
Rice::define_class_under<
|
10
|
-
.define_constructor(Rice::Constructor<
|
11
|
-
.define_method("lg_config_k", &
|
12
|
-
.define_method("compact?", &
|
13
|
-
.define_method("empty?", &
|
12
|
+
Rice::define_class_under<hll_sketch>(m, "HllSketch")
|
13
|
+
.define_constructor(Rice::Constructor<hll_sketch, int>())
|
14
|
+
.define_method("lg_config_k", &hll_sketch::get_lg_config_k)
|
15
|
+
.define_method("compact?", &hll_sketch::is_compact)
|
16
|
+
.define_method("empty?", &hll_sketch::is_empty)
|
17
|
+
.define_method("composite_estimate", &hll_sketch::get_composite_estimate)
|
14
18
|
.define_method(
|
15
19
|
"update",
|
16
|
-
*[](
|
20
|
+
*[](hll_sketch& self, Rice::Object datum) {
|
17
21
|
if (FIXNUM_P(datum.value())) {
|
18
22
|
return self.update(from_ruby<int64_t>(datum));
|
19
23
|
} else if (datum.is_a(rb_cNumeric)) {
|
@@ -24,19 +28,19 @@ void init_hll(Rice::Module& m) {
|
|
24
28
|
})
|
25
29
|
.define_method(
|
26
30
|
"estimate",
|
27
|
-
*[](
|
31
|
+
*[](hll_sketch& self) {
|
28
32
|
return self.get_estimate();
|
29
33
|
})
|
30
34
|
.define_method(
|
31
35
|
"serialize_compact",
|
32
|
-
*[](
|
36
|
+
*[](hll_sketch& self) {
|
33
37
|
std::ostringstream oss;
|
34
38
|
self.serialize_compact(oss);
|
35
39
|
return oss.str();
|
36
40
|
})
|
37
41
|
.define_method(
|
38
42
|
"serialize_updatable",
|
39
|
-
*[](
|
43
|
+
*[](hll_sketch& self) {
|
40
44
|
std::ostringstream oss;
|
41
45
|
self.serialize_updatable(oss);
|
42
46
|
return oss.str();
|
@@ -44,26 +48,31 @@ void init_hll(Rice::Module& m) {
|
|
44
48
|
// TODO change to summary?
|
45
49
|
.define_method(
|
46
50
|
"to_string",
|
47
|
-
*[](
|
51
|
+
*[](hll_sketch& self) {
|
48
52
|
return self.to_string();
|
49
53
|
})
|
50
54
|
.define_singleton_method(
|
51
55
|
"deserialize",
|
52
56
|
*[](std::string& is) {
|
53
57
|
std::istringstream iss(is);
|
54
|
-
return
|
58
|
+
return hll_sketch::deserialize(iss);
|
55
59
|
});
|
56
60
|
|
57
|
-
Rice::define_class_under<
|
58
|
-
.define_constructor(Rice::Constructor<
|
61
|
+
Rice::define_class_under<hll_union>(m, "HllUnion")
|
62
|
+
.define_constructor(Rice::Constructor<hll_union, int>())
|
59
63
|
.define_method(
|
60
64
|
"update",
|
61
|
-
*[](
|
62
|
-
self.update(
|
65
|
+
*[](hll_union& self, hll_sketch& sketch) {
|
66
|
+
self.update(sketch);
|
63
67
|
})
|
64
68
|
.define_method(
|
65
69
|
"estimate",
|
66
|
-
*[](
|
70
|
+
*[](hll_union& self) {
|
67
71
|
return self.get_estimate();
|
72
|
+
})
|
73
|
+
.define_method(
|
74
|
+
"result",
|
75
|
+
*[](hll_union& self) {
|
76
|
+
return self.get_result();
|
68
77
|
});
|
69
78
|
}
|
@@ -6,12 +6,36 @@
|
|
6
6
|
#include <rice/Constructor.hpp>
|
7
7
|
#include <rice/Module.hpp>
|
8
8
|
|
9
|
+
using datasketches::kll_sketch;
|
10
|
+
|
11
|
+
template<>
|
12
|
+
std::vector<int> from_ruby<std::vector<int>>(Rice::Object x)
|
13
|
+
{
|
14
|
+
auto a = Rice::Array(x);
|
15
|
+
std::vector<int> vec(a.size());
|
16
|
+
for (long i = 0; i < a.size(); i++) {
|
17
|
+
vec[i] = from_ruby<int>(a[i]);
|
18
|
+
}
|
19
|
+
return vec;
|
20
|
+
}
|
21
|
+
|
22
|
+
template<>
|
23
|
+
std::vector<float> from_ruby<std::vector<float>>(Rice::Object x)
|
24
|
+
{
|
25
|
+
auto a = Rice::Array(x);
|
26
|
+
std::vector<float> vec(a.size());
|
27
|
+
for (long i = 0; i < a.size(); i++) {
|
28
|
+
vec[i] = from_ruby<float>(a[i]);
|
29
|
+
}
|
30
|
+
return vec;
|
31
|
+
}
|
32
|
+
|
9
33
|
template<>
|
10
34
|
std::vector<double> from_ruby<std::vector<double>>(Rice::Object x)
|
11
35
|
{
|
12
36
|
auto a = Rice::Array(x);
|
13
37
|
std::vector<double> vec(a.size());
|
14
|
-
for (
|
38
|
+
for (long i = 0; i < a.size(); i++) {
|
15
39
|
vec[i] = from_ruby<double>(a[i]);
|
16
40
|
}
|
17
41
|
return vec;
|
@@ -37,32 +61,65 @@ Rice::Object to_ruby<std::vector<float>>(std::vector<float> const & x)
|
|
37
61
|
return a;
|
38
62
|
}
|
39
63
|
|
64
|
+
template<>
|
65
|
+
Rice::Object to_ruby<std::vector<double>>(std::vector<double> const & x)
|
66
|
+
{
|
67
|
+
auto a = Rice::Array();
|
68
|
+
for (size_t i = 0; i < x.size(); i++) {
|
69
|
+
a.push(x[i]);
|
70
|
+
}
|
71
|
+
return a;
|
72
|
+
}
|
73
|
+
|
40
74
|
template<typename T>
|
41
75
|
void bind_kll_sketch(Rice::Module& m, const char* name) {
|
42
|
-
Rice::define_class_under<
|
43
|
-
.define_constructor(Rice::Constructor<
|
44
|
-
.define_method("empty?", &
|
45
|
-
.define_method("
|
46
|
-
.define_method("
|
47
|
-
.define_method("
|
76
|
+
Rice::define_class_under<kll_sketch<T>>(m, name)
|
77
|
+
.define_constructor(Rice::Constructor<kll_sketch<T>, uint16_t>(), (Rice::Arg("k")=kll_sketch<T>::DEFAULT_K))
|
78
|
+
.define_method("empty?", &kll_sketch<T>::is_empty)
|
79
|
+
.define_method("n", &kll_sketch<T>::get_n)
|
80
|
+
.define_method("num_retained", &kll_sketch<T>::get_num_retained)
|
81
|
+
.define_method("estimation_mode?", &kll_sketch<T>::is_estimation_mode)
|
82
|
+
.define_method("min_value", &kll_sketch<T>::get_min_value)
|
83
|
+
.define_method("max_value", &kll_sketch<T>::get_max_value)
|
84
|
+
.define_method("quantile", &kll_sketch<T>::get_quantile)
|
48
85
|
.define_method(
|
49
86
|
"quantiles",
|
50
|
-
*[](
|
51
|
-
|
87
|
+
*[](kll_sketch<T>& self, Rice::Object obj) {
|
88
|
+
if (obj.is_a(rb_cArray)) {
|
89
|
+
auto fractions = from_ruby<std::vector<double>>(obj);
|
90
|
+
return self.get_quantiles(&fractions[0], fractions.size());
|
91
|
+
} else {
|
92
|
+
return self.get_quantiles(from_ruby<size_t>(obj));
|
93
|
+
}
|
94
|
+
})
|
95
|
+
.define_method(
|
96
|
+
"rank",
|
97
|
+
*[](kll_sketch<T>& self, const T item) {
|
98
|
+
return self.get_rank(item);
|
99
|
+
})
|
100
|
+
.define_method(
|
101
|
+
"pmf",
|
102
|
+
*[](kll_sketch<T>& self, std::vector<T> split_points) {
|
103
|
+
return self.get_PMF(&split_points[0], split_points.size());
|
104
|
+
})
|
105
|
+
.define_method(
|
106
|
+
"cdf",
|
107
|
+
*[](kll_sketch<T>& self, std::vector<T> split_points) {
|
108
|
+
return self.get_CDF(&split_points[0], split_points.size());
|
52
109
|
})
|
53
110
|
.define_method(
|
54
111
|
"merge",
|
55
|
-
*[](
|
112
|
+
*[](kll_sketch<T>& self, const kll_sketch<T>& other) {
|
56
113
|
self.merge(other);
|
57
114
|
})
|
58
115
|
.define_method(
|
59
116
|
"update",
|
60
|
-
*[](
|
117
|
+
*[](kll_sketch<T>& self, const T item) {
|
61
118
|
self.update(item);
|
62
119
|
})
|
63
120
|
.define_method(
|
64
121
|
"serialize",
|
65
|
-
*[](
|
122
|
+
*[](kll_sketch<T>& self) {
|
66
123
|
std::ostringstream oss;
|
67
124
|
self.serialize(oss);
|
68
125
|
return oss.str();
|
@@ -70,14 +127,14 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
|
|
70
127
|
// TODO change to summary?
|
71
128
|
.define_method(
|
72
129
|
"to_string",
|
73
|
-
*[](
|
130
|
+
*[](kll_sketch<T>& self) {
|
74
131
|
return self.to_string();
|
75
132
|
})
|
76
133
|
.define_singleton_method(
|
77
134
|
"deserialize",
|
78
135
|
*[](std::string& is) {
|
79
136
|
std::istringstream iss(is);
|
80
|
-
return
|
137
|
+
return kll_sketch<T>::deserialize(iss);
|
81
138
|
});
|
82
139
|
}
|
83
140
|
|
@@ -1,12 +1,102 @@
|
|
1
1
|
#include <sstream>
|
2
2
|
|
3
3
|
#include <theta_sketch.hpp>
|
4
|
+
#include <theta_union.hpp>
|
5
|
+
#include <theta_intersection.hpp>
|
6
|
+
#include <theta_a_not_b.hpp>
|
4
7
|
|
5
8
|
#include <rice/Constructor.hpp>
|
6
9
|
#include <rice/Module.hpp>
|
7
10
|
|
11
|
+
using datasketches::theta_sketch;
|
12
|
+
using datasketches::update_theta_sketch;
|
13
|
+
using datasketches::compact_theta_sketch;
|
14
|
+
using datasketches::theta_union;
|
15
|
+
using datasketches::theta_intersection;
|
16
|
+
using datasketches::theta_a_not_b;
|
17
|
+
|
18
|
+
using datasketches::DEFAULT_SEED;
|
19
|
+
|
20
|
+
using Rice::Arg;
|
21
|
+
|
8
22
|
void init_theta(Rice::Module& m) {
|
9
|
-
Rice::define_class_under<
|
10
|
-
.define_method("empty?", &
|
11
|
-
.define_method("estimate", &
|
23
|
+
Rice::define_class_under<theta_sketch>(m, "ThetaSketch")
|
24
|
+
.define_method("empty?", &theta_sketch::is_empty)
|
25
|
+
.define_method("estimate", &theta_sketch::get_estimate)
|
26
|
+
.define_method("lower_bound", &theta_sketch::get_lower_bound)
|
27
|
+
.define_method("upper_bound", &theta_sketch::get_upper_bound)
|
28
|
+
.define_method(
|
29
|
+
"serialize",
|
30
|
+
*[](theta_sketch& self) {
|
31
|
+
std::ostringstream oss;
|
32
|
+
self.serialize(oss);
|
33
|
+
return oss.str();
|
34
|
+
});
|
35
|
+
|
36
|
+
Rice::define_class_under<update_theta_sketch, theta_sketch>(m, "UpdateThetaSketch")
|
37
|
+
.define_singleton_method(
|
38
|
+
"new",
|
39
|
+
*[](uint8_t lg_k, double p, uint64_t seed) {
|
40
|
+
update_theta_sketch::builder builder;
|
41
|
+
builder.set_lg_k(lg_k);
|
42
|
+
builder.set_p(p);
|
43
|
+
builder.set_seed(seed);
|
44
|
+
return builder.build();
|
45
|
+
},
|
46
|
+
(Arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED))
|
47
|
+
.define_method("compact", &update_theta_sketch::compact, (Arg("ordered")=true))
|
48
|
+
.define_method(
|
49
|
+
"update",
|
50
|
+
*[](update_theta_sketch& self, Rice::Object datum) {
|
51
|
+
if (FIXNUM_P(datum.value())) {
|
52
|
+
return self.update(from_ruby<int64_t>(datum));
|
53
|
+
} else if (datum.is_a(rb_cNumeric)) {
|
54
|
+
return self.update(from_ruby<double>(datum));
|
55
|
+
} else {
|
56
|
+
return self.update(datum.to_s().str());
|
57
|
+
}
|
58
|
+
})
|
59
|
+
.define_method(
|
60
|
+
"estimate",
|
61
|
+
*[](update_theta_sketch& self) {
|
62
|
+
return self.get_estimate();
|
63
|
+
})
|
64
|
+
.define_singleton_method(
|
65
|
+
"deserialize",
|
66
|
+
*[](std::string& is) {
|
67
|
+
std::istringstream iss(is);
|
68
|
+
return update_theta_sketch::deserialize(iss);
|
69
|
+
});
|
70
|
+
|
71
|
+
Rice::define_class_under<compact_theta_sketch, theta_sketch>(m, "CompactThetaSketch")
|
72
|
+
.define_singleton_method(
|
73
|
+
"deserialize",
|
74
|
+
*[](std::string& is) {
|
75
|
+
std::istringstream iss(is);
|
76
|
+
return compact_theta_sketch::deserialize(iss);
|
77
|
+
});
|
78
|
+
|
79
|
+
Rice::define_class_under<theta_union>(m, "ThetaUnion")
|
80
|
+
.define_singleton_method(
|
81
|
+
"new",
|
82
|
+
*[](uint8_t lg_k, double p, uint64_t seed) {
|
83
|
+
theta_union::builder builder;
|
84
|
+
builder.set_lg_k(lg_k);
|
85
|
+
builder.set_p(p);
|
86
|
+
builder.set_seed(seed);
|
87
|
+
return builder.build();
|
88
|
+
},
|
89
|
+
(Arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED))
|
90
|
+
.define_method("update", &theta_union::update)
|
91
|
+
.define_method("result", &theta_union::get_result, (Arg("ordered")=true));
|
92
|
+
|
93
|
+
Rice::define_class_under<theta_intersection>(m, "ThetaIntersection")
|
94
|
+
.define_constructor(Rice::Constructor<theta_intersection, uint64_t>(), (Arg("seed")=DEFAULT_SEED))
|
95
|
+
.define_method("update", &theta_intersection::update)
|
96
|
+
.define_method("result", &theta_intersection::get_result, (Arg("ordered")=true))
|
97
|
+
.define_method("result?", &theta_intersection::has_result);
|
98
|
+
|
99
|
+
Rice::define_class_under<theta_a_not_b>(m, "ThetaANotB")
|
100
|
+
.define_constructor(Rice::Constructor<theta_a_not_b, uint64_t>(), (Arg("seed")=DEFAULT_SEED))
|
101
|
+
.define_method("compute", &theta_a_not_b::compute, (Arg("a"), Arg("b"), Arg("ordered")=true));
|
12
102
|
}
|
@@ -6,18 +6,20 @@
|
|
6
6
|
#include <rice/Constructor.hpp>
|
7
7
|
#include <rice/Module.hpp>
|
8
8
|
|
9
|
+
using datasketches::var_opt_sketch;
|
10
|
+
|
9
11
|
template<typename T>
|
10
12
|
void bind_vo_sketch(Rice::Module &m, const char* name) {
|
11
|
-
Rice::define_class_under<
|
12
|
-
.define_constructor(Rice::Constructor<
|
13
|
-
.define_method("k", &
|
14
|
-
.define_method("n", &
|
15
|
-
.define_method("num_samples", &
|
16
|
-
.define_method("empty?", &
|
17
|
-
.define_method("reset", &
|
13
|
+
Rice::define_class_under<var_opt_sketch<T>>(m, "VarOptSketch")
|
14
|
+
.define_constructor(Rice::Constructor<var_opt_sketch<T>, uint32_t>())
|
15
|
+
.define_method("k", &var_opt_sketch<T>::get_k)
|
16
|
+
.define_method("n", &var_opt_sketch<T>::get_n)
|
17
|
+
.define_method("num_samples", &var_opt_sketch<T>::get_num_samples)
|
18
|
+
.define_method("empty?", &var_opt_sketch<T>::is_empty)
|
19
|
+
.define_method("reset", &var_opt_sketch<T>::reset)
|
18
20
|
.define_method(
|
19
21
|
"samples",
|
20
|
-
*[](
|
22
|
+
*[](var_opt_sketch<T>& self) {
|
21
23
|
auto a = Rice::Array();
|
22
24
|
for (auto item : self) {
|
23
25
|
auto t = Rice::Array();
|
@@ -29,7 +31,7 @@ void bind_vo_sketch(Rice::Module &m, const char* name) {
|
|
29
31
|
})
|
30
32
|
.define_method(
|
31
33
|
"update",
|
32
|
-
*[](
|
34
|
+
*[](var_opt_sketch<T>& self, const T item) {
|
33
35
|
self.update(item);
|
34
36
|
});
|
35
37
|
}
|
data/lib/datasketches/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datasketches
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-01-
|
11
|
+
date: 2021-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|