datasketches 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +80 -4
- data/ext/datasketches/cpc_wrapper.cpp +29 -11
- data/ext/datasketches/hll_wrapper.cpp +25 -16
- data/ext/datasketches/kll_wrapper.cpp +71 -14
- data/ext/datasketches/theta_wrapper.cpp +93 -3
- data/ext/datasketches/vo_wrapper.cpp +11 -9
- data/lib/datasketches/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d74bd4504918fd8217f9cf7d8c24442b5c196b2d874a042f6a2f30a24ae51db
|
4
|
+
data.tar.gz: 7521194ca0d05808b234c6cb1bb5150874c28f7e9536820f728b33b0aec8c52a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db41c21f18a51bf285e32556c52e06453810448d1d75d11cd1022576a98d5069977d65fe6bab7e5bd6425468531414fdd801a1bde1bfdf82fc05f455d97b80fd
|
7
|
+
data.tar.gz: 892e9f2c4b891194cfc1af8387494e78eb2e703b4a56f7c91e1b4784f0bd2439f35f8e25f92e954be44418d2964e804f892ab687df2acf8e9b64d43e0049cca0
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -18,6 +18,7 @@ Distinct counting
|
|
18
18
|
|
19
19
|
- [CPC sketch](#cpc-sketch)
|
20
20
|
- [HyperLogLog sketch](#hyperloglog-sketch)
|
21
|
+
- [Theta sketch](#theta-sketch)
|
21
22
|
|
22
23
|
Most frequent
|
23
24
|
|
@@ -36,7 +37,7 @@ Sampling
|
|
36
37
|
Create a sketch
|
37
38
|
|
38
39
|
```ruby
|
39
|
-
sketch = DataSketches::CpcSketch.new
|
40
|
+
sketch = DataSketches::CpcSketch.new
|
40
41
|
```
|
41
42
|
|
42
43
|
Add data
|
@@ -65,6 +66,15 @@ Load a sketch
|
|
65
66
|
sketch = DataSketches::CpcSketch.deserialize(data)
|
66
67
|
```
|
67
68
|
|
69
|
+
Get the union
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
u = DataSketches::CpcUnion.new(14)
|
73
|
+
u.update(sketch1)
|
74
|
+
u.update(sketch2)
|
75
|
+
u.result
|
76
|
+
```
|
77
|
+
|
68
78
|
## HyperLogLog Sketch
|
69
79
|
|
70
80
|
Create a sketch
|
@@ -107,7 +117,66 @@ Get the union
|
|
107
117
|
u = DataSketches::HllUnion.new(14)
|
108
118
|
u.update(sketch1)
|
109
119
|
u.update(sketch2)
|
110
|
-
u.
|
120
|
+
u.result
|
121
|
+
```
|
122
|
+
|
123
|
+
## Theta Sketch
|
124
|
+
|
125
|
+
Create a sketch
|
126
|
+
|
127
|
+
```ruby
|
128
|
+
sketch = DataSketches::UpdateThetaSketch.new
|
129
|
+
```
|
130
|
+
|
131
|
+
Add data
|
132
|
+
|
133
|
+
```ruby
|
134
|
+
sketch.update(1)
|
135
|
+
sketch.update(2.0)
|
136
|
+
sketch.update("three")
|
137
|
+
```
|
138
|
+
|
139
|
+
Estimate the count
|
140
|
+
|
141
|
+
```ruby
|
142
|
+
sketch.estimate
|
143
|
+
```
|
144
|
+
|
145
|
+
Save a sketch
|
146
|
+
|
147
|
+
```ruby
|
148
|
+
data = sketch.serialize
|
149
|
+
```
|
150
|
+
|
151
|
+
Load a sketch
|
152
|
+
|
153
|
+
```ruby
|
154
|
+
sketch = DataSketches::UpdateThetaSketch.deserialize(data)
|
155
|
+
```
|
156
|
+
|
157
|
+
Get the union
|
158
|
+
|
159
|
+
```ruby
|
160
|
+
u = DataSketches::ThetaUnion.new
|
161
|
+
u.update(sketch1)
|
162
|
+
u.update(sketch2)
|
163
|
+
u.result
|
164
|
+
```
|
165
|
+
|
166
|
+
Get the intersection
|
167
|
+
|
168
|
+
```ruby
|
169
|
+
i = DataSketches::ThetaIntersection.new
|
170
|
+
i.update(sketch1)
|
171
|
+
i.update(sketch2)
|
172
|
+
i.result
|
173
|
+
```
|
174
|
+
|
175
|
+
Compute A not B
|
176
|
+
|
177
|
+
```ruby
|
178
|
+
d = DataSketches::ThetaANotB.new
|
179
|
+
d.compute(a, b)
|
111
180
|
```
|
112
181
|
|
113
182
|
## Frequent Item Sketch
|
@@ -149,9 +218,9 @@ sketch = DataSketches::FrequentStringsSketch.deserialize(data)
|
|
149
218
|
Create a sketch
|
150
219
|
|
151
220
|
```ruby
|
152
|
-
sketch = DataSketches::KllIntsSketch.new
|
221
|
+
sketch = DataSketches::KllIntsSketch.new
|
153
222
|
# or
|
154
|
-
sketch = DataSketches::KllFloatsSketch.new
|
223
|
+
sketch = DataSketches::KllFloatsSketch.new
|
155
224
|
```
|
156
225
|
|
157
226
|
Add data
|
@@ -167,6 +236,7 @@ Get quantiles
|
|
167
236
|
```ruby
|
168
237
|
sketch.quantile(0.5)
|
169
238
|
sketch.quantiles([0.25, 0.5, 0.75])
|
239
|
+
sketch.quantiles(3)
|
170
240
|
```
|
171
241
|
|
172
242
|
Get the minimum and maximum values from the stream
|
@@ -210,6 +280,12 @@ sketch.update(2.0)
|
|
210
280
|
sketch.update("three")
|
211
281
|
```
|
212
282
|
|
283
|
+
Sample data
|
284
|
+
|
285
|
+
```ruby
|
286
|
+
sketch.samples
|
287
|
+
```
|
288
|
+
|
213
289
|
## Credits
|
214
290
|
|
215
291
|
This library is modeled after the DataSketches [Python API](https://github.com/apache/datasketches-cpp/tree/master/python).
|
@@ -1,20 +1,29 @@
|
|
1
1
|
#include <sstream>
|
2
2
|
|
3
3
|
#include <cpc_sketch.hpp>
|
4
|
+
#include <cpc_union.hpp>
|
4
5
|
|
5
6
|
#include <rice/Constructor.hpp>
|
6
7
|
#include <rice/Module.hpp>
|
7
8
|
|
9
|
+
using datasketches::cpc_sketch;
|
10
|
+
using datasketches::cpc_union;
|
11
|
+
|
12
|
+
using datasketches::CPC_DEFAULT_LG_K;
|
13
|
+
using datasketches::DEFAULT_SEED;
|
14
|
+
|
15
|
+
using Rice::Arg;
|
16
|
+
|
8
17
|
void init_cpc(Rice::Module& m) {
|
9
|
-
Rice::define_class_under<
|
10
|
-
.define_constructor(Rice::Constructor<
|
11
|
-
.define_method("lg_k", &
|
12
|
-
.define_method("empty?", &
|
13
|
-
.define_method("lower_bound", &
|
14
|
-
.define_method("upper_bound", &
|
18
|
+
Rice::define_class_under<cpc_sketch>(m, "CpcSketch")
|
19
|
+
.define_constructor(Rice::Constructor<cpc_sketch, uint8_t, uint64_t>(), (Rice::Arg("lg_k")=CPC_DEFAULT_LG_K, Rice::Arg("seed")=DEFAULT_SEED))
|
20
|
+
.define_method("lg_k", &cpc_sketch::get_lg_k)
|
21
|
+
.define_method("empty?", &cpc_sketch::is_empty)
|
22
|
+
.define_method("lower_bound", &cpc_sketch::get_lower_bound)
|
23
|
+
.define_method("upper_bound", &cpc_sketch::get_upper_bound)
|
15
24
|
.define_method(
|
16
25
|
"update",
|
17
|
-
*[](
|
26
|
+
*[](cpc_sketch& self, Rice::Object datum) {
|
18
27
|
if (FIXNUM_P(datum.value())) {
|
19
28
|
return self.update(from_ruby<int64_t>(datum));
|
20
29
|
} else if (datum.is_a(rb_cNumeric)) {
|
@@ -25,12 +34,12 @@ void init_cpc(Rice::Module& m) {
|
|
25
34
|
})
|
26
35
|
.define_method(
|
27
36
|
"estimate",
|
28
|
-
*[](
|
37
|
+
*[](cpc_sketch& self) {
|
29
38
|
return self.get_estimate();
|
30
39
|
})
|
31
40
|
.define_method(
|
32
41
|
"serialize",
|
33
|
-
*[](
|
42
|
+
*[](cpc_sketch& self) {
|
34
43
|
std::ostringstream oss;
|
35
44
|
self.serialize(oss);
|
36
45
|
return oss.str();
|
@@ -38,13 +47,22 @@ void init_cpc(Rice::Module& m) {
|
|
38
47
|
// TODO change to summary?
|
39
48
|
.define_method(
|
40
49
|
"to_string",
|
41
|
-
*[](
|
50
|
+
*[](cpc_sketch& self) {
|
42
51
|
return self.to_string();
|
43
52
|
})
|
44
53
|
.define_singleton_method(
|
45
54
|
"deserialize",
|
46
55
|
*[](std::string& is) {
|
47
56
|
std::istringstream iss(is);
|
48
|
-
return
|
57
|
+
return cpc_sketch::deserialize(iss);
|
58
|
+
});
|
59
|
+
|
60
|
+
Rice::define_class_under<cpc_union>(m, "CpcUnion")
|
61
|
+
.define_constructor(Rice::Constructor<cpc_union, uint8_t, uint64_t>(), (Rice::Arg("lg_k"), Rice::Arg("seed")=DEFAULT_SEED))
|
62
|
+
.define_method("result", &cpc_union::get_result)
|
63
|
+
.define_method(
|
64
|
+
"update",
|
65
|
+
*[](cpc_union& self, cpc_sketch& sketch) {
|
66
|
+
self.update(sketch);
|
49
67
|
});
|
50
68
|
}
|
@@ -5,15 +5,19 @@
|
|
5
5
|
#include <rice/Constructor.hpp>
|
6
6
|
#include <rice/Module.hpp>
|
7
7
|
|
8
|
+
using datasketches::hll_sketch;
|
9
|
+
using datasketches::hll_union;
|
10
|
+
|
8
11
|
void init_hll(Rice::Module& m) {
|
9
|
-
Rice::define_class_under<
|
10
|
-
.define_constructor(Rice::Constructor<
|
11
|
-
.define_method("lg_config_k", &
|
12
|
-
.define_method("compact?", &
|
13
|
-
.define_method("empty?", &
|
12
|
+
Rice::define_class_under<hll_sketch>(m, "HllSketch")
|
13
|
+
.define_constructor(Rice::Constructor<hll_sketch, int>())
|
14
|
+
.define_method("lg_config_k", &hll_sketch::get_lg_config_k)
|
15
|
+
.define_method("compact?", &hll_sketch::is_compact)
|
16
|
+
.define_method("empty?", &hll_sketch::is_empty)
|
17
|
+
.define_method("composite_estimate", &hll_sketch::get_composite_estimate)
|
14
18
|
.define_method(
|
15
19
|
"update",
|
16
|
-
*[](
|
20
|
+
*[](hll_sketch& self, Rice::Object datum) {
|
17
21
|
if (FIXNUM_P(datum.value())) {
|
18
22
|
return self.update(from_ruby<int64_t>(datum));
|
19
23
|
} else if (datum.is_a(rb_cNumeric)) {
|
@@ -24,19 +28,19 @@ void init_hll(Rice::Module& m) {
|
|
24
28
|
})
|
25
29
|
.define_method(
|
26
30
|
"estimate",
|
27
|
-
*[](
|
31
|
+
*[](hll_sketch& self) {
|
28
32
|
return self.get_estimate();
|
29
33
|
})
|
30
34
|
.define_method(
|
31
35
|
"serialize_compact",
|
32
|
-
*[](
|
36
|
+
*[](hll_sketch& self) {
|
33
37
|
std::ostringstream oss;
|
34
38
|
self.serialize_compact(oss);
|
35
39
|
return oss.str();
|
36
40
|
})
|
37
41
|
.define_method(
|
38
42
|
"serialize_updatable",
|
39
|
-
*[](
|
43
|
+
*[](hll_sketch& self) {
|
40
44
|
std::ostringstream oss;
|
41
45
|
self.serialize_updatable(oss);
|
42
46
|
return oss.str();
|
@@ -44,26 +48,31 @@ void init_hll(Rice::Module& m) {
|
|
44
48
|
// TODO change to summary?
|
45
49
|
.define_method(
|
46
50
|
"to_string",
|
47
|
-
*[](
|
51
|
+
*[](hll_sketch& self) {
|
48
52
|
return self.to_string();
|
49
53
|
})
|
50
54
|
.define_singleton_method(
|
51
55
|
"deserialize",
|
52
56
|
*[](std::string& is) {
|
53
57
|
std::istringstream iss(is);
|
54
|
-
return
|
58
|
+
return hll_sketch::deserialize(iss);
|
55
59
|
});
|
56
60
|
|
57
|
-
Rice::define_class_under<
|
58
|
-
.define_constructor(Rice::Constructor<
|
61
|
+
Rice::define_class_under<hll_union>(m, "HllUnion")
|
62
|
+
.define_constructor(Rice::Constructor<hll_union, int>())
|
59
63
|
.define_method(
|
60
64
|
"update",
|
61
|
-
*[](
|
62
|
-
self.update(
|
65
|
+
*[](hll_union& self, hll_sketch& sketch) {
|
66
|
+
self.update(sketch);
|
63
67
|
})
|
64
68
|
.define_method(
|
65
69
|
"estimate",
|
66
|
-
*[](
|
70
|
+
*[](hll_union& self) {
|
67
71
|
return self.get_estimate();
|
72
|
+
})
|
73
|
+
.define_method(
|
74
|
+
"result",
|
75
|
+
*[](hll_union& self) {
|
76
|
+
return self.get_result();
|
68
77
|
});
|
69
78
|
}
|
@@ -6,12 +6,36 @@
|
|
6
6
|
#include <rice/Constructor.hpp>
|
7
7
|
#include <rice/Module.hpp>
|
8
8
|
|
9
|
+
using datasketches::kll_sketch;
|
10
|
+
|
11
|
+
template<>
|
12
|
+
std::vector<int> from_ruby<std::vector<int>>(Rice::Object x)
|
13
|
+
{
|
14
|
+
auto a = Rice::Array(x);
|
15
|
+
std::vector<int> vec(a.size());
|
16
|
+
for (long i = 0; i < a.size(); i++) {
|
17
|
+
vec[i] = from_ruby<int>(a[i]);
|
18
|
+
}
|
19
|
+
return vec;
|
20
|
+
}
|
21
|
+
|
22
|
+
template<>
|
23
|
+
std::vector<float> from_ruby<std::vector<float>>(Rice::Object x)
|
24
|
+
{
|
25
|
+
auto a = Rice::Array(x);
|
26
|
+
std::vector<float> vec(a.size());
|
27
|
+
for (long i = 0; i < a.size(); i++) {
|
28
|
+
vec[i] = from_ruby<float>(a[i]);
|
29
|
+
}
|
30
|
+
return vec;
|
31
|
+
}
|
32
|
+
|
9
33
|
template<>
|
10
34
|
std::vector<double> from_ruby<std::vector<double>>(Rice::Object x)
|
11
35
|
{
|
12
36
|
auto a = Rice::Array(x);
|
13
37
|
std::vector<double> vec(a.size());
|
14
|
-
for (
|
38
|
+
for (long i = 0; i < a.size(); i++) {
|
15
39
|
vec[i] = from_ruby<double>(a[i]);
|
16
40
|
}
|
17
41
|
return vec;
|
@@ -37,32 +61,65 @@ Rice::Object to_ruby<std::vector<float>>(std::vector<float> const & x)
|
|
37
61
|
return a;
|
38
62
|
}
|
39
63
|
|
64
|
+
template<>
|
65
|
+
Rice::Object to_ruby<std::vector<double>>(std::vector<double> const & x)
|
66
|
+
{
|
67
|
+
auto a = Rice::Array();
|
68
|
+
for (size_t i = 0; i < x.size(); i++) {
|
69
|
+
a.push(x[i]);
|
70
|
+
}
|
71
|
+
return a;
|
72
|
+
}
|
73
|
+
|
40
74
|
template<typename T>
|
41
75
|
void bind_kll_sketch(Rice::Module& m, const char* name) {
|
42
|
-
Rice::define_class_under<
|
43
|
-
.define_constructor(Rice::Constructor<
|
44
|
-
.define_method("empty?", &
|
45
|
-
.define_method("
|
46
|
-
.define_method("
|
47
|
-
.define_method("
|
76
|
+
Rice::define_class_under<kll_sketch<T>>(m, name)
|
77
|
+
.define_constructor(Rice::Constructor<kll_sketch<T>, uint16_t>(), (Rice::Arg("k")=kll_sketch<T>::DEFAULT_K))
|
78
|
+
.define_method("empty?", &kll_sketch<T>::is_empty)
|
79
|
+
.define_method("n", &kll_sketch<T>::get_n)
|
80
|
+
.define_method("num_retained", &kll_sketch<T>::get_num_retained)
|
81
|
+
.define_method("estimation_mode?", &kll_sketch<T>::is_estimation_mode)
|
82
|
+
.define_method("min_value", &kll_sketch<T>::get_min_value)
|
83
|
+
.define_method("max_value", &kll_sketch<T>::get_max_value)
|
84
|
+
.define_method("quantile", &kll_sketch<T>::get_quantile)
|
48
85
|
.define_method(
|
49
86
|
"quantiles",
|
50
|
-
*[](
|
51
|
-
|
87
|
+
*[](kll_sketch<T>& self, Rice::Object obj) {
|
88
|
+
if (obj.is_a(rb_cArray)) {
|
89
|
+
auto fractions = from_ruby<std::vector<double>>(obj);
|
90
|
+
return self.get_quantiles(&fractions[0], fractions.size());
|
91
|
+
} else {
|
92
|
+
return self.get_quantiles(from_ruby<size_t>(obj));
|
93
|
+
}
|
94
|
+
})
|
95
|
+
.define_method(
|
96
|
+
"rank",
|
97
|
+
*[](kll_sketch<T>& self, const T item) {
|
98
|
+
return self.get_rank(item);
|
99
|
+
})
|
100
|
+
.define_method(
|
101
|
+
"pmf",
|
102
|
+
*[](kll_sketch<T>& self, std::vector<T> split_points) {
|
103
|
+
return self.get_PMF(&split_points[0], split_points.size());
|
104
|
+
})
|
105
|
+
.define_method(
|
106
|
+
"cdf",
|
107
|
+
*[](kll_sketch<T>& self, std::vector<T> split_points) {
|
108
|
+
return self.get_CDF(&split_points[0], split_points.size());
|
52
109
|
})
|
53
110
|
.define_method(
|
54
111
|
"merge",
|
55
|
-
*[](
|
112
|
+
*[](kll_sketch<T>& self, const kll_sketch<T>& other) {
|
56
113
|
self.merge(other);
|
57
114
|
})
|
58
115
|
.define_method(
|
59
116
|
"update",
|
60
|
-
*[](
|
117
|
+
*[](kll_sketch<T>& self, const T item) {
|
61
118
|
self.update(item);
|
62
119
|
})
|
63
120
|
.define_method(
|
64
121
|
"serialize",
|
65
|
-
*[](
|
122
|
+
*[](kll_sketch<T>& self) {
|
66
123
|
std::ostringstream oss;
|
67
124
|
self.serialize(oss);
|
68
125
|
return oss.str();
|
@@ -70,14 +127,14 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
|
|
70
127
|
// TODO change to summary?
|
71
128
|
.define_method(
|
72
129
|
"to_string",
|
73
|
-
*[](
|
130
|
+
*[](kll_sketch<T>& self) {
|
74
131
|
return self.to_string();
|
75
132
|
})
|
76
133
|
.define_singleton_method(
|
77
134
|
"deserialize",
|
78
135
|
*[](std::string& is) {
|
79
136
|
std::istringstream iss(is);
|
80
|
-
return
|
137
|
+
return kll_sketch<T>::deserialize(iss);
|
81
138
|
});
|
82
139
|
}
|
83
140
|
|
@@ -1,12 +1,102 @@
|
|
1
1
|
#include <sstream>
|
2
2
|
|
3
3
|
#include <theta_sketch.hpp>
|
4
|
+
#include <theta_union.hpp>
|
5
|
+
#include <theta_intersection.hpp>
|
6
|
+
#include <theta_a_not_b.hpp>
|
4
7
|
|
5
8
|
#include <rice/Constructor.hpp>
|
6
9
|
#include <rice/Module.hpp>
|
7
10
|
|
11
|
+
using datasketches::theta_sketch;
|
12
|
+
using datasketches::update_theta_sketch;
|
13
|
+
using datasketches::compact_theta_sketch;
|
14
|
+
using datasketches::theta_union;
|
15
|
+
using datasketches::theta_intersection;
|
16
|
+
using datasketches::theta_a_not_b;
|
17
|
+
|
18
|
+
using datasketches::DEFAULT_SEED;
|
19
|
+
|
20
|
+
using Rice::Arg;
|
21
|
+
|
8
22
|
void init_theta(Rice::Module& m) {
|
9
|
-
Rice::define_class_under<
|
10
|
-
.define_method("empty?", &
|
11
|
-
.define_method("estimate", &
|
23
|
+
Rice::define_class_under<theta_sketch>(m, "ThetaSketch")
|
24
|
+
.define_method("empty?", &theta_sketch::is_empty)
|
25
|
+
.define_method("estimate", &theta_sketch::get_estimate)
|
26
|
+
.define_method("lower_bound", &theta_sketch::get_lower_bound)
|
27
|
+
.define_method("upper_bound", &theta_sketch::get_upper_bound)
|
28
|
+
.define_method(
|
29
|
+
"serialize",
|
30
|
+
*[](theta_sketch& self) {
|
31
|
+
std::ostringstream oss;
|
32
|
+
self.serialize(oss);
|
33
|
+
return oss.str();
|
34
|
+
});
|
35
|
+
|
36
|
+
Rice::define_class_under<update_theta_sketch, theta_sketch>(m, "UpdateThetaSketch")
|
37
|
+
.define_singleton_method(
|
38
|
+
"new",
|
39
|
+
*[](uint8_t lg_k, double p, uint64_t seed) {
|
40
|
+
update_theta_sketch::builder builder;
|
41
|
+
builder.set_lg_k(lg_k);
|
42
|
+
builder.set_p(p);
|
43
|
+
builder.set_seed(seed);
|
44
|
+
return builder.build();
|
45
|
+
},
|
46
|
+
(Arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED))
|
47
|
+
.define_method("compact", &update_theta_sketch::compact, (Arg("ordered")=true))
|
48
|
+
.define_method(
|
49
|
+
"update",
|
50
|
+
*[](update_theta_sketch& self, Rice::Object datum) {
|
51
|
+
if (FIXNUM_P(datum.value())) {
|
52
|
+
return self.update(from_ruby<int64_t>(datum));
|
53
|
+
} else if (datum.is_a(rb_cNumeric)) {
|
54
|
+
return self.update(from_ruby<double>(datum));
|
55
|
+
} else {
|
56
|
+
return self.update(datum.to_s().str());
|
57
|
+
}
|
58
|
+
})
|
59
|
+
.define_method(
|
60
|
+
"estimate",
|
61
|
+
*[](update_theta_sketch& self) {
|
62
|
+
return self.get_estimate();
|
63
|
+
})
|
64
|
+
.define_singleton_method(
|
65
|
+
"deserialize",
|
66
|
+
*[](std::string& is) {
|
67
|
+
std::istringstream iss(is);
|
68
|
+
return update_theta_sketch::deserialize(iss);
|
69
|
+
});
|
70
|
+
|
71
|
+
Rice::define_class_under<compact_theta_sketch, theta_sketch>(m, "CompactThetaSketch")
|
72
|
+
.define_singleton_method(
|
73
|
+
"deserialize",
|
74
|
+
*[](std::string& is) {
|
75
|
+
std::istringstream iss(is);
|
76
|
+
return compact_theta_sketch::deserialize(iss);
|
77
|
+
});
|
78
|
+
|
79
|
+
Rice::define_class_under<theta_union>(m, "ThetaUnion")
|
80
|
+
.define_singleton_method(
|
81
|
+
"new",
|
82
|
+
*[](uint8_t lg_k, double p, uint64_t seed) {
|
83
|
+
theta_union::builder builder;
|
84
|
+
builder.set_lg_k(lg_k);
|
85
|
+
builder.set_p(p);
|
86
|
+
builder.set_seed(seed);
|
87
|
+
return builder.build();
|
88
|
+
},
|
89
|
+
(Arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED))
|
90
|
+
.define_method("update", &theta_union::update)
|
91
|
+
.define_method("result", &theta_union::get_result, (Arg("ordered")=true));
|
92
|
+
|
93
|
+
Rice::define_class_under<theta_intersection>(m, "ThetaIntersection")
|
94
|
+
.define_constructor(Rice::Constructor<theta_intersection, uint64_t>(), (Arg("seed")=DEFAULT_SEED))
|
95
|
+
.define_method("update", &theta_intersection::update)
|
96
|
+
.define_method("result", &theta_intersection::get_result, (Arg("ordered")=true))
|
97
|
+
.define_method("result?", &theta_intersection::has_result);
|
98
|
+
|
99
|
+
Rice::define_class_under<theta_a_not_b>(m, "ThetaANotB")
|
100
|
+
.define_constructor(Rice::Constructor<theta_a_not_b, uint64_t>(), (Arg("seed")=DEFAULT_SEED))
|
101
|
+
.define_method("compute", &theta_a_not_b::compute, (Arg("a"), Arg("b"), Arg("ordered")=true));
|
12
102
|
}
|
@@ -6,18 +6,20 @@
|
|
6
6
|
#include <rice/Constructor.hpp>
|
7
7
|
#include <rice/Module.hpp>
|
8
8
|
|
9
|
+
using datasketches::var_opt_sketch;
|
10
|
+
|
9
11
|
template<typename T>
|
10
12
|
void bind_vo_sketch(Rice::Module &m, const char* name) {
|
11
|
-
Rice::define_class_under<
|
12
|
-
.define_constructor(Rice::Constructor<
|
13
|
-
.define_method("k", &
|
14
|
-
.define_method("n", &
|
15
|
-
.define_method("num_samples", &
|
16
|
-
.define_method("empty?", &
|
17
|
-
.define_method("reset", &
|
13
|
+
Rice::define_class_under<var_opt_sketch<T>>(m, "VarOptSketch")
|
14
|
+
.define_constructor(Rice::Constructor<var_opt_sketch<T>, uint32_t>())
|
15
|
+
.define_method("k", &var_opt_sketch<T>::get_k)
|
16
|
+
.define_method("n", &var_opt_sketch<T>::get_n)
|
17
|
+
.define_method("num_samples", &var_opt_sketch<T>::get_num_samples)
|
18
|
+
.define_method("empty?", &var_opt_sketch<T>::is_empty)
|
19
|
+
.define_method("reset", &var_opt_sketch<T>::reset)
|
18
20
|
.define_method(
|
19
21
|
"samples",
|
20
|
-
*[](
|
22
|
+
*[](var_opt_sketch<T>& self) {
|
21
23
|
auto a = Rice::Array();
|
22
24
|
for (auto item : self) {
|
23
25
|
auto t = Rice::Array();
|
@@ -29,7 +31,7 @@ void bind_vo_sketch(Rice::Module &m, const char* name) {
|
|
29
31
|
})
|
30
32
|
.define_method(
|
31
33
|
"update",
|
32
|
-
*[](
|
34
|
+
*[](var_opt_sketch<T>& self, const T item) {
|
33
35
|
self.update(item);
|
34
36
|
});
|
35
37
|
}
|
data/lib/datasketches/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datasketches
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-01-
|
11
|
+
date: 2021-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|