datasketches 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6a0f499815550712d69187bde082bfbbf0a2b076f8d4ae747cf9c85b59cdbd55
4
- data.tar.gz: 46cc4162f0a894a5dfc1c000ab74a4baf95404f515a69a19bede6aed28630279
3
+ metadata.gz: eadeb269a420be62fa48fbdc950bd5583cdfd1d74776b3e627be852f2551af89
4
+ data.tar.gz: 422aee84c37c94267c9e6222145eb0bad0f0530e6917faba42a8ef17f1079b69
5
5
  SHA512:
6
- metadata.gz: 63ac3ca31eeb6bd10bc4ededf78f15143211e38a34fcc627d9ecdc500097b862c6e4a73e0032b5338851041facf0359c9c142e4f80e83fd76cc0ed1c264201e8
7
- data.tar.gz: 9345d158a0e83d4b4a70703ddf47a686ab2a82c6dd31c5e55a1823be3a009c53ac807bc81e759812a682f23e56c2af43aab59baa1d2b2341eeac8dbdf619f26d
6
+ metadata.gz: de095d22fac5e124d9927178961aaf0c375e43020ffe23f448ae16f57b48725aea254076755c518588036ebfe66197ff31170e4ac08365b2d1608cc440a144af
7
+ data.tar.gz: 7f6dda2ab6190c1aa6527951406db4c57d58d2d8205bd5a13c61d50c2d0ed8c903a69d4dc2ccb805ba47d2b04c1cacc691dacef07f5c0228d83d5572e6fe805e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.5.2 (2026-04-06)
2
+
3
+ - Improved installation time
4
+ - Fixed unnecessary copying with `deserialize` method
5
+
6
+ ## 0.5.1 (2025-10-26)
7
+
8
+ - Fixed error with Rice 4.7
9
+
1
10
  ## 0.5.0 (2025-04-03)
2
11
 
3
12
  - Dropped support for Ruby < 3.2
@@ -1,9 +1,10 @@
1
+ #include <cstdint>
1
2
  #include <sstream>
3
+ #include <string>
2
4
 
3
5
  #include <cpc_sketch.hpp>
4
6
  #include <cpc_union.hpp>
5
-
6
- #include "ext.h"
7
+ #include <rice/rice.hpp>
7
8
 
8
9
  using datasketches::cpc_sketch;
9
10
  using datasketches::cpc_union;
@@ -11,8 +12,6 @@ using datasketches::cpc_union;
11
12
  using datasketches::cpc_constants::DEFAULT_LG_K;
12
13
  using datasketches::DEFAULT_SEED;
13
14
 
14
- using Rice::Arg;
15
-
16
15
  void init_cpc(Rice::Module& m) {
17
16
  Rice::define_class_under<cpc_sketch>(m, "CpcSketch")
18
17
  .define_constructor(Rice::Constructor<cpc_sketch, uint8_t, uint64_t>(), Rice::Arg("lg_k")=DEFAULT_LG_K, Rice::Arg("seed")=DEFAULT_SEED)
@@ -24,11 +23,11 @@ void init_cpc(Rice::Module& m) {
24
23
  "update",
25
24
  [](cpc_sketch& self, Rice::Object datum) {
26
25
  if (FIXNUM_P(datum.value())) {
27
- return self.update(Rice::detail::From_Ruby<int64_t>().convert(datum));
26
+ self.update(Rice::detail::From_Ruby<int64_t>().convert(datum));
28
27
  } else if (datum.is_a(rb_cNumeric)) {
29
- return self.update(Rice::detail::From_Ruby<double>().convert(datum));
28
+ self.update(Rice::detail::From_Ruby<double>().convert(datum));
30
29
  } else {
31
- return self.update(datum.to_s().str());
30
+ self.update(datum.to_s().str());
32
31
  }
33
32
  })
34
33
  .define_method(
@@ -41,19 +40,18 @@ void init_cpc(Rice::Module& m) {
41
40
  [](cpc_sketch& self) {
42
41
  std::ostringstream oss;
43
42
  self.serialize(oss);
44
- return oss.str();
43
+ return Rice::String(oss.str());
45
44
  })
46
45
  // TODO change to summary?
47
46
  .define_method(
48
47
  "to_string",
49
48
  [](cpc_sketch& self) {
50
- return self.to_string();
49
+ return Rice::String(self.to_string());
51
50
  })
52
51
  .define_singleton_function(
53
52
  "deserialize",
54
- [](const std::string& is) {
55
- std::istringstream iss(is);
56
- return cpc_sketch::deserialize(iss);
53
+ [](Rice::String is) {
54
+ return cpc_sketch::deserialize(is.c_str(), is.length());
57
55
  });
58
56
 
59
57
  Rice::define_class_under<cpc_union>(m, "CpcUnion")
@@ -1,4 +1,4 @@
1
- #include "ext.h"
1
+ #include <rice/rice.hpp>
2
2
 
3
3
  void init_cpc(Rice::Module& m);
4
4
  void init_fi(Rice::Module& m);
@@ -8,8 +8,7 @@ void init_theta(Rice::Module& m);
8
8
  void init_vo(Rice::Module& m);
9
9
 
10
10
  extern "C"
11
- void Init_ext()
12
- {
11
+ void Init_ext() {
13
12
  Rice::Module m = Rice::define_module("DataSketches");
14
13
  init_cpc(m);
15
14
  init_fi(m);
@@ -1,8 +1,10 @@
1
+ #include <cstdint>
1
2
  #include <sstream>
3
+ #include <string>
2
4
 
3
5
  #include <frequent_items_sketch.hpp>
4
-
5
- #include "ext.h"
6
+ #include <rice/rice.hpp>
7
+ #include <rice/stl.hpp>
6
8
 
7
9
  template<typename T>
8
10
  void bind_fi_sketch(Rice::Module& m, const char* name) {
@@ -25,19 +27,18 @@ void bind_fi_sketch(Rice::Module& m, const char* name) {
25
27
  [](datasketches::frequent_items_sketch<T>& self) {
26
28
  std::ostringstream oss;
27
29
  self.serialize(oss);
28
- return oss.str();
30
+ return Rice::String(oss.str());
29
31
  })
30
32
  // TODO change to summary?
31
33
  .define_method(
32
34
  "to_string",
33
35
  [](datasketches::frequent_items_sketch<T>& self) {
34
- return self.to_string();
36
+ return Rice::String(self.to_string());
35
37
  })
36
38
  .define_singleton_function(
37
39
  "deserialize",
38
- [](const std::string& is) {
39
- std::istringstream iss(is);
40
- return datasketches::frequent_items_sketch<T>::deserialize(iss);
40
+ [](Rice::String is) {
41
+ return datasketches::frequent_items_sketch<T>::deserialize(is.c_str(), is.length());
41
42
  });
42
43
  }
43
44
 
@@ -1,8 +1,9 @@
1
+ #include <cstdint>
1
2
  #include <sstream>
3
+ #include <string>
2
4
 
3
5
  #include <hll.hpp>
4
-
5
- #include "ext.h"
6
+ #include <rice/rice.hpp>
6
7
 
7
8
  using datasketches::hll_sketch;
8
9
  using datasketches::hll_union;
@@ -18,11 +19,11 @@ void init_hll(Rice::Module& m) {
18
19
  "update",
19
20
  [](hll_sketch& self, Rice::Object datum) {
20
21
  if (FIXNUM_P(datum.value())) {
21
- return self.update(Rice::detail::From_Ruby<int64_t>().convert(datum));
22
+ self.update(Rice::detail::From_Ruby<int64_t>().convert(datum));
22
23
  } else if (datum.is_a(rb_cNumeric)) {
23
- return self.update(Rice::detail::From_Ruby<double>().convert(datum));
24
+ self.update(Rice::detail::From_Ruby<double>().convert(datum));
24
25
  } else {
25
- return self.update(datum.to_s().str());
26
+ self.update(datum.to_s().str());
26
27
  }
27
28
  })
28
29
  .define_method(
@@ -35,26 +36,25 @@ void init_hll(Rice::Module& m) {
35
36
  [](hll_sketch& self) {
36
37
  std::ostringstream oss;
37
38
  self.serialize_compact(oss);
38
- return oss.str();
39
+ return Rice::String(oss.str());
39
40
  })
40
41
  .define_method(
41
42
  "serialize_updatable",
42
43
  [](hll_sketch& self) {
43
44
  std::ostringstream oss;
44
45
  self.serialize_updatable(oss);
45
- return oss.str();
46
+ return Rice::String(oss.str());
46
47
  })
47
48
  // TODO change to summary?
48
49
  .define_method(
49
50
  "to_string",
50
51
  [](hll_sketch& self) {
51
- return self.to_string();
52
+ return Rice::String(self.to_string());
52
53
  })
53
54
  .define_singleton_function(
54
55
  "deserialize",
55
- [](const std::string& is) {
56
- std::istringstream iss(is);
57
- return hll_sketch::deserialize(iss);
56
+ [](Rice::String is) {
57
+ return hll_sketch::deserialize(is.c_str(), is.length());
58
58
  });
59
59
 
60
60
  Rice::define_class_under<hll_union>(m, "HllUnion")
@@ -1,27 +1,33 @@
1
+ #include <cstdint>
1
2
  #include <sstream>
3
+ #include <string>
4
+ #include <vector>
2
5
 
3
6
  #include <kll_sketch.hpp>
4
-
5
- #include "ext.h"
7
+ #include <rice/rice.hpp>
6
8
 
7
9
  using datasketches::kll_sketch;
8
10
 
9
- namespace Rice::detail
10
- {
11
+ namespace Rice::detail {
11
12
  template<typename T>
12
- class To_Ruby<std::vector<T>>
13
- {
13
+ class To_Ruby<std::vector<T>> {
14
14
  public:
15
- VALUE convert(std::vector<T> const & x)
16
- {
17
- auto a = rb_ary_new2(x.size());
15
+ To_Ruby() = default;
16
+
17
+ explicit To_Ruby(Arg* arg) : arg_(arg) { }
18
+
19
+ VALUE convert(const std::vector<T>& x) {
20
+ auto a = detail::protect(rb_ary_new2, x.size());
18
21
  for (const auto& v : x) {
19
22
  detail::protect(rb_ary_push, a, To_Ruby<T>().convert(v));
20
23
  }
21
24
  return a;
22
25
  }
26
+
27
+ private:
28
+ Arg* arg_ = nullptr;
23
29
  };
24
- }
30
+ } // namespace Rice::detail
25
31
 
26
32
  template<typename T>
27
33
  void bind_kll_sketch(Rice::Module& m, const char* name) {
@@ -45,13 +51,15 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
45
51
  }, Rice::Arg("item"), Rice::Arg("inclusive")=false)
46
52
  .define_method(
47
53
  "pmf",
48
- [](kll_sketch<T>& self, const std::vector<T>& split_points, bool inclusive) {
49
- return self.get_PMF(&split_points[0], split_points.size(), inclusive);
54
+ [](kll_sketch<T>& self, Rice::Array rb_split_points, bool inclusive) {
55
+ std::vector<T> split_points = rb_split_points.to_vector<T>();
56
+ return self.get_PMF(split_points.data(), split_points.size(), inclusive);
50
57
  }, Rice::Arg("split_points"), Rice::Arg("inclusive")=false)
51
58
  .define_method(
52
59
  "cdf",
53
- [](kll_sketch<T>& self, const std::vector<T>& split_points, bool inclusive) {
54
- return self.get_CDF(&split_points[0], split_points.size(), inclusive);
60
+ [](kll_sketch<T>& self, Rice::Array rb_split_points, bool inclusive) {
61
+ std::vector<T> split_points = rb_split_points.to_vector<T>();
62
+ return self.get_CDF(split_points.data(), split_points.size(), inclusive);
55
63
  }, Rice::Arg("split_points"), Rice::Arg("inclusive")=false)
56
64
  .define_method(
57
65
  "merge",
@@ -68,19 +76,18 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
68
76
  [](kll_sketch<T>& self) {
69
77
  std::ostringstream oss;
70
78
  self.serialize(oss);
71
- return oss.str();
79
+ return Rice::String(oss.str());
72
80
  })
73
81
  // TODO change to summary?
74
82
  .define_method(
75
83
  "to_string",
76
84
  [](kll_sketch<T>& self) {
77
- return self.to_string();
85
+ return Rice::String(self.to_string());
78
86
  })
79
87
  .define_singleton_function(
80
88
  "deserialize",
81
- [](const std::string& is) {
82
- std::istringstream iss(is);
83
- return kll_sketch<T>::deserialize(iss);
89
+ [](Rice::String is) {
90
+ return kll_sketch<T>::deserialize(is.c_str(), is.length());
84
91
  });
85
92
  }
86
93
 
@@ -1,11 +1,11 @@
1
- #include <sstream>
1
+ #include <cstdint>
2
+ #include <string>
2
3
 
4
+ #include <rice/rice.hpp>
5
+ #include <theta_a_not_b.hpp>
6
+ #include <theta_intersection.hpp>
3
7
  #include <theta_sketch.hpp>
4
8
  #include <theta_union.hpp>
5
- #include <theta_intersection.hpp>
6
- #include <theta_a_not_b.hpp>
7
-
8
- #include "ext.h"
9
9
 
10
10
  using datasketches::theta_sketch;
11
11
  using datasketches::update_theta_sketch;
@@ -16,8 +16,6 @@ using datasketches::theta_a_not_b;
16
16
 
17
17
  using datasketches::DEFAULT_SEED;
18
18
 
19
- using Rice::Arg;
20
-
21
19
  void init_theta(Rice::Module& m) {
22
20
  Rice::define_class_under<theta_sketch>(m, "ThetaSketch")
23
21
  .define_method(
@@ -44,9 +42,8 @@ void init_theta(Rice::Module& m) {
44
42
  Rice::define_class_under<compact_theta_sketch, theta_sketch>(m, "CompactThetaSketch")
45
43
  .define_singleton_function(
46
44
  "deserialize",
47
- [](const std::string& is) {
48
- std::istringstream iss(is);
49
- return compact_theta_sketch::deserialize(iss);
45
+ [](Rice::String is) {
46
+ return compact_theta_sketch::deserialize(is.c_str(), is.length());
50
47
  });
51
48
 
52
49
  Rice::define_class_under<update_theta_sketch, theta_sketch>(m, "UpdateThetaSketch")
@@ -59,17 +56,17 @@ void init_theta(Rice::Module& m) {
59
56
  builder.set_seed(seed);
60
57
  return builder.build();
61
58
  },
62
- Arg("lg_k")=datasketches::theta_constants::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED)
63
- .define_method("compact", &update_theta_sketch::compact, Arg("ordered")=true)
59
+ Rice::Arg("lg_k")=datasketches::theta_constants::DEFAULT_LG_K, Rice::Arg("p")=1.0, Rice::Arg("seed")=DEFAULT_SEED)
60
+ .define_method("compact", &update_theta_sketch::compact, Rice::Arg("ordered")=true)
64
61
  .define_method(
65
62
  "update",
66
63
  [](update_theta_sketch& self, Rice::Object datum) {
67
64
  if (FIXNUM_P(datum.value())) {
68
- return self.update(Rice::detail::From_Ruby<int64_t>().convert(datum));
65
+ self.update(Rice::detail::From_Ruby<int64_t>().convert(datum));
69
66
  } else if (datum.is_a(rb_cNumeric)) {
70
- return self.update(Rice::detail::From_Ruby<double>().convert(datum));
67
+ self.update(Rice::detail::From_Ruby<double>().convert(datum));
71
68
  } else {
72
- return self.update(datum.to_s().str());
69
+ self.update(datum.to_s().str());
73
70
  }
74
71
  })
75
72
  .define_method(
@@ -81,24 +78,24 @@ void init_theta(Rice::Module& m) {
81
78
  Rice::define_class_under<theta_union>(m, "ThetaUnion")
82
79
  .define_singleton_function(
83
80
  "new",
84
- [](uint8_t lg_k, double p, uint64_t seed) {
81
+ [](uint8_t lg_k, float p, uint64_t seed) {
85
82
  theta_union::builder builder;
86
83
  builder.set_lg_k(lg_k);
87
84
  builder.set_p(p);
88
85
  builder.set_seed(seed);
89
86
  return builder.build();
90
87
  },
91
- Arg("lg_k")=datasketches::theta_constants::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED)
88
+ Rice::Arg("lg_k")=datasketches::theta_constants::DEFAULT_LG_K, Rice::Arg("p")=1.0f, Rice::Arg("seed")=DEFAULT_SEED)
92
89
  .define_method("update", &theta_union::update<const theta_sketch&>)
93
- .define_method("result", &theta_union::get_result, Arg("ordered")=true);
90
+ .define_method("result", &theta_union::get_result, Rice::Arg("ordered")=true);
94
91
 
95
92
  Rice::define_class_under<theta_intersection>(m, "ThetaIntersection")
96
- .define_constructor(Rice::Constructor<theta_intersection, uint64_t>(), Arg("seed")=DEFAULT_SEED)
93
+ .define_constructor(Rice::Constructor<theta_intersection, uint64_t>(), Rice::Arg("seed")=DEFAULT_SEED)
97
94
  .define_method("update", &theta_intersection::update<const theta_sketch&>)
98
- .define_method("result", &theta_intersection::get_result, Arg("ordered")=true)
95
+ .define_method("result", &theta_intersection::get_result, Rice::Arg("ordered")=true)
99
96
  .define_method("result?", &theta_intersection::has_result);
100
97
 
101
98
  Rice::define_class_under<theta_a_not_b>(m, "ThetaANotB")
102
- .define_constructor(Rice::Constructor<theta_a_not_b, uint64_t>(), Arg("seed")=DEFAULT_SEED)
103
- .define_method("compute", &theta_a_not_b::compute<const theta_sketch&, const theta_sketch&>, Arg("a"), Arg("b"), Arg("ordered")=true);
99
+ .define_constructor(Rice::Constructor<theta_a_not_b, uint64_t>(), Rice::Arg("seed")=DEFAULT_SEED)
100
+ .define_method("compute", &theta_a_not_b::compute<const theta_sketch&, const theta_sketch&>, Rice::Arg("a"), Rice::Arg("b"), Rice::Arg("ordered")=true);
104
101
  }
@@ -1,9 +1,8 @@
1
- #include <sstream>
1
+ #include <cstdint>
2
2
 
3
+ #include <rice/rice.hpp>
3
4
  #include <var_opt_sketch.hpp>
4
5
 
5
- #include "ext.h"
6
-
7
6
  using datasketches::var_opt_sketch;
8
7
 
9
8
  template<typename T>
@@ -21,9 +20,9 @@ void bind_vo_sketch(Rice::Module &m, const char* name) {
21
20
  auto a = Rice::Array();
22
21
  for (auto item : self) {
23
22
  auto t = Rice::Array();
24
- t.push(item.first);
25
- t.push(item.second);
26
- a.push(t);
23
+ t.push(item.first, false);
24
+ t.push(item.second, false);
25
+ a.push(t, false);
27
26
  }
28
27
  return a;
29
28
  })
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.5.0"
2
+ VERSION = "0.5.2"
3
3
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-04-03 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: rice
@@ -15,14 +15,14 @@ dependencies:
15
15
  requirements:
16
16
  - - ">="
17
17
  - !ruby/object:Gem::Version
18
- version: 4.3.3
18
+ version: '4.7'
19
19
  type: :runtime
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - ">="
24
24
  - !ruby/object:Gem::Version
25
- version: 4.3.3
25
+ version: '4.7'
26
26
  email: andrew@ankane.org
27
27
  executables: []
28
28
  extensions:
@@ -35,7 +35,6 @@ files:
35
35
  - README.md
36
36
  - ext/datasketches/cpc_wrapper.cpp
37
37
  - ext/datasketches/ext.cpp
38
- - ext/datasketches/ext.h
39
38
  - ext/datasketches/extconf.rb
40
39
  - ext/datasketches/fi_wrapper.cpp
41
40
  - ext/datasketches/hll_wrapper.cpp
@@ -343,7 +342,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
343
342
  - !ruby/object:Gem::Version
344
343
  version: '0'
345
344
  requirements: []
346
- rubygems_version: 3.6.2
345
+ rubygems_version: 4.0.6
347
346
  specification_version: 4
348
347
  summary: Sketch data structures for Ruby
349
348
  test_files: []
@@ -1,4 +0,0 @@
1
- #pragma once
2
-
3
- #include <rice/rice.hpp>
4
- #include <rice/stl.hpp>