datasketches 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a130c4ac282887b6cd406512ab071baa7c26f6c375ed3a2c0d48dac400b7572e
4
- data.tar.gz: c08742aa7121718f1459096fecc5c7a62f87ba540264082c00021dfe30eadfe6
3
+ metadata.gz: eadeb269a420be62fa48fbdc950bd5583cdfd1d74776b3e627be852f2551af89
4
+ data.tar.gz: 422aee84c37c94267c9e6222145eb0bad0f0530e6917faba42a8ef17f1079b69
5
5
  SHA512:
6
- metadata.gz: e1453845a086ed9b2069e53a1a4dd47af5f3529507e41886fdd7b6728baae52164a645dc9a6f4895b74b392ef086d7873ccf36b23f49728cb73bf73428384d79
7
- data.tar.gz: 423d9134d4dc134daa0c93dfe79c455e8abf5b5d95b67b8d05bf0e105a1746b60ddbfb06ff461137f6645553074db9104e940fc6f84f2bf410448a3b7671d56c
6
+ metadata.gz: de095d22fac5e124d9927178961aaf0c375e43020ffe23f448ae16f57b48725aea254076755c518588036ebfe66197ff31170e4ac08365b2d1608cc440a144af
7
+ data.tar.gz: 7f6dda2ab6190c1aa6527951406db4c57d58d2d8205bd5a13c61d50c2d0ed8c903a69d4dc2ccb805ba47d2b04c1cacc691dacef07f5c0228d83d5572e6fe805e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.5.2 (2026-04-06)
2
+
3
+ - Improved installation time
4
+ - Fixed unnecessary copying with `deserialize` method
5
+
1
6
  ## 0.5.1 (2025-10-26)
2
7
 
3
8
  - Fixed error with Rice 4.7
@@ -1,10 +1,10 @@
1
+ #include <cstdint>
1
2
  #include <sstream>
2
3
  #include <string>
3
4
 
4
5
  #include <cpc_sketch.hpp>
5
6
  #include <cpc_union.hpp>
6
7
  #include <rice/rice.hpp>
7
- #include <rice/stl.hpp>
8
8
 
9
9
  using datasketches::cpc_sketch;
10
10
  using datasketches::cpc_union;
@@ -23,11 +23,11 @@ void init_cpc(Rice::Module& m) {
23
23
  "update",
24
24
  [](cpc_sketch& self, Rice::Object datum) {
25
25
  if (FIXNUM_P(datum.value())) {
26
- return self.update(Rice::detail::From_Ruby<int64_t>().convert(datum));
26
+ self.update(Rice::detail::From_Ruby<int64_t>().convert(datum));
27
27
  } else if (datum.is_a(rb_cNumeric)) {
28
- return self.update(Rice::detail::From_Ruby<double>().convert(datum));
28
+ self.update(Rice::detail::From_Ruby<double>().convert(datum));
29
29
  } else {
30
- return self.update(datum.to_s().str());
30
+ self.update(datum.to_s().str());
31
31
  }
32
32
  })
33
33
  .define_method(
@@ -40,19 +40,18 @@ void init_cpc(Rice::Module& m) {
40
40
  [](cpc_sketch& self) {
41
41
  std::ostringstream oss;
42
42
  self.serialize(oss);
43
- return oss.str();
43
+ return Rice::String(oss.str());
44
44
  })
45
45
  // TODO change to summary?
46
46
  .define_method(
47
47
  "to_string",
48
48
  [](cpc_sketch& self) {
49
- return self.to_string();
49
+ return Rice::String(self.to_string());
50
50
  })
51
51
  .define_singleton_function(
52
52
  "deserialize",
53
- [](const std::string& is) {
54
- std::istringstream iss(is);
55
- return cpc_sketch::deserialize(iss);
53
+ [](Rice::String is) {
54
+ return cpc_sketch::deserialize(is.c_str(), is.length());
56
55
  });
57
56
 
58
57
  Rice::define_class_under<cpc_union>(m, "CpcUnion")
@@ -27,19 +27,18 @@ void bind_fi_sketch(Rice::Module& m, const char* name) {
27
27
  [](datasketches::frequent_items_sketch<T>& self) {
28
28
  std::ostringstream oss;
29
29
  self.serialize(oss);
30
- return oss.str();
30
+ return Rice::String(oss.str());
31
31
  })
32
32
  // TODO change to summary?
33
33
  .define_method(
34
34
  "to_string",
35
35
  [](datasketches::frequent_items_sketch<T>& self) {
36
- return self.to_string();
36
+ return Rice::String(self.to_string());
37
37
  })
38
38
  .define_singleton_function(
39
39
  "deserialize",
40
- [](const std::string& is) {
41
- std::istringstream iss(is);
42
- return datasketches::frequent_items_sketch<T>::deserialize(iss);
40
+ [](Rice::String is) {
41
+ return datasketches::frequent_items_sketch<T>::deserialize(is.c_str(), is.length());
43
42
  });
44
43
  }
45
44
 
@@ -1,9 +1,9 @@
1
+ #include <cstdint>
1
2
  #include <sstream>
2
3
  #include <string>
3
4
 
4
5
  #include <hll.hpp>
5
6
  #include <rice/rice.hpp>
6
- #include <rice/stl.hpp>
7
7
 
8
8
  using datasketches::hll_sketch;
9
9
  using datasketches::hll_union;
@@ -19,11 +19,11 @@ void init_hll(Rice::Module& m) {
19
19
  "update",
20
20
  [](hll_sketch& self, Rice::Object datum) {
21
21
  if (FIXNUM_P(datum.value())) {
22
- return self.update(Rice::detail::From_Ruby<int64_t>().convert(datum));
22
+ self.update(Rice::detail::From_Ruby<int64_t>().convert(datum));
23
23
  } else if (datum.is_a(rb_cNumeric)) {
24
- return self.update(Rice::detail::From_Ruby<double>().convert(datum));
24
+ self.update(Rice::detail::From_Ruby<double>().convert(datum));
25
25
  } else {
26
- return self.update(datum.to_s().str());
26
+ self.update(datum.to_s().str());
27
27
  }
28
28
  })
29
29
  .define_method(
@@ -36,26 +36,25 @@ void init_hll(Rice::Module& m) {
36
36
  [](hll_sketch& self) {
37
37
  std::ostringstream oss;
38
38
  self.serialize_compact(oss);
39
- return oss.str();
39
+ return Rice::String(oss.str());
40
40
  })
41
41
  .define_method(
42
42
  "serialize_updatable",
43
43
  [](hll_sketch& self) {
44
44
  std::ostringstream oss;
45
45
  self.serialize_updatable(oss);
46
- return oss.str();
46
+ return Rice::String(oss.str());
47
47
  })
48
48
  // TODO change to summary?
49
49
  .define_method(
50
50
  "to_string",
51
51
  [](hll_sketch& self) {
52
- return self.to_string();
52
+ return Rice::String(self.to_string());
53
53
  })
54
54
  .define_singleton_function(
55
55
  "deserialize",
56
- [](const std::string& is) {
57
- std::istringstream iss(is);
58
- return hll_sketch::deserialize(iss);
56
+ [](Rice::String is) {
57
+ return hll_sketch::deserialize(is.c_str(), is.length());
59
58
  });
60
59
 
61
60
  Rice::define_class_under<hll_union>(m, "HllUnion")
@@ -1,10 +1,10 @@
1
+ #include <cstdint>
1
2
  #include <sstream>
2
3
  #include <string>
3
4
  #include <vector>
4
5
 
5
6
  #include <kll_sketch.hpp>
6
7
  #include <rice/rice.hpp>
7
- #include <rice/stl.hpp>
8
8
 
9
9
  using datasketches::kll_sketch;
10
10
 
@@ -16,8 +16,8 @@ namespace Rice::detail {
16
16
 
17
17
  explicit To_Ruby(Arg* arg) : arg_(arg) { }
18
18
 
19
- VALUE convert(std::vector<T> const & x) {
20
- auto a = rb_ary_new2(x.size());
19
+ VALUE convert(const std::vector<T>& x) {
20
+ auto a = detail::protect(rb_ary_new2, x.size());
21
21
  for (const auto& v : x) {
22
22
  detail::protect(rb_ary_push, a, To_Ruby<T>().convert(v));
23
23
  }
@@ -51,13 +51,15 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
51
51
  }, Rice::Arg("item"), Rice::Arg("inclusive")=false)
52
52
  .define_method(
53
53
  "pmf",
54
- [](kll_sketch<T>& self, const std::vector<T>& split_points, bool inclusive) {
55
- return self.get_PMF(&split_points[0], split_points.size(), inclusive);
54
+ [](kll_sketch<T>& self, Rice::Array rb_split_points, bool inclusive) {
55
+ std::vector<T> split_points = rb_split_points.to_vector<T>();
56
+ return self.get_PMF(split_points.data(), split_points.size(), inclusive);
56
57
  }, Rice::Arg("split_points"), Rice::Arg("inclusive")=false)
57
58
  .define_method(
58
59
  "cdf",
59
- [](kll_sketch<T>& self, const std::vector<T>& split_points, bool inclusive) {
60
- return self.get_CDF(&split_points[0], split_points.size(), inclusive);
60
+ [](kll_sketch<T>& self, Rice::Array rb_split_points, bool inclusive) {
61
+ std::vector<T> split_points = rb_split_points.to_vector<T>();
62
+ return self.get_CDF(split_points.data(), split_points.size(), inclusive);
61
63
  }, Rice::Arg("split_points"), Rice::Arg("inclusive")=false)
62
64
  .define_method(
63
65
  "merge",
@@ -74,19 +76,18 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
74
76
  [](kll_sketch<T>& self) {
75
77
  std::ostringstream oss;
76
78
  self.serialize(oss);
77
- return oss.str();
79
+ return Rice::String(oss.str());
78
80
  })
79
81
  // TODO change to summary?
80
82
  .define_method(
81
83
  "to_string",
82
84
  [](kll_sketch<T>& self) {
83
- return self.to_string();
85
+ return Rice::String(self.to_string());
84
86
  })
85
87
  .define_singleton_function(
86
88
  "deserialize",
87
- [](const std::string& is) {
88
- std::istringstream iss(is);
89
- return kll_sketch<T>::deserialize(iss);
89
+ [](Rice::String is) {
90
+ return kll_sketch<T>::deserialize(is.c_str(), is.length());
90
91
  });
91
92
  }
92
93
 
@@ -1,12 +1,11 @@
1
- #include <sstream>
1
+ #include <cstdint>
2
2
  #include <string>
3
3
 
4
4
  #include <rice/rice.hpp>
5
- #include <rice/stl.hpp>
5
+ #include <theta_a_not_b.hpp>
6
+ #include <theta_intersection.hpp>
6
7
  #include <theta_sketch.hpp>
7
8
  #include <theta_union.hpp>
8
- #include <theta_intersection.hpp>
9
- #include <theta_a_not_b.hpp>
10
9
 
11
10
  using datasketches::theta_sketch;
12
11
  using datasketches::update_theta_sketch;
@@ -43,9 +42,8 @@ void init_theta(Rice::Module& m) {
43
42
  Rice::define_class_under<compact_theta_sketch, theta_sketch>(m, "CompactThetaSketch")
44
43
  .define_singleton_function(
45
44
  "deserialize",
46
- [](const std::string& is) {
47
- std::istringstream iss(is);
48
- return compact_theta_sketch::deserialize(iss);
45
+ [](Rice::String is) {
46
+ return compact_theta_sketch::deserialize(is.c_str(), is.length());
49
47
  });
50
48
 
51
49
  Rice::define_class_under<update_theta_sketch, theta_sketch>(m, "UpdateThetaSketch")
@@ -64,11 +62,11 @@ void init_theta(Rice::Module& m) {
64
62
  "update",
65
63
  [](update_theta_sketch& self, Rice::Object datum) {
66
64
  if (FIXNUM_P(datum.value())) {
67
- return self.update(Rice::detail::From_Ruby<int64_t>().convert(datum));
65
+ self.update(Rice::detail::From_Ruby<int64_t>().convert(datum));
68
66
  } else if (datum.is_a(rb_cNumeric)) {
69
- return self.update(Rice::detail::From_Ruby<double>().convert(datum));
67
+ self.update(Rice::detail::From_Ruby<double>().convert(datum));
70
68
  } else {
71
- return self.update(datum.to_s().str());
69
+ self.update(datum.to_s().str());
72
70
  }
73
71
  })
74
72
  .define_method(
@@ -80,14 +78,14 @@ void init_theta(Rice::Module& m) {
80
78
  Rice::define_class_under<theta_union>(m, "ThetaUnion")
81
79
  .define_singleton_function(
82
80
  "new",
83
- [](uint8_t lg_k, double p, uint64_t seed) {
81
+ [](uint8_t lg_k, float p, uint64_t seed) {
84
82
  theta_union::builder builder;
85
83
  builder.set_lg_k(lg_k);
86
84
  builder.set_p(p);
87
85
  builder.set_seed(seed);
88
86
  return builder.build();
89
87
  },
90
- Rice::Arg("lg_k")=datasketches::theta_constants::DEFAULT_LG_K, Rice::Arg("p")=1.0, Rice::Arg("seed")=DEFAULT_SEED)
88
+ Rice::Arg("lg_k")=datasketches::theta_constants::DEFAULT_LG_K, Rice::Arg("p")=1.0f, Rice::Arg("seed")=DEFAULT_SEED)
91
89
  .define_method("update", &theta_union::update<const theta_sketch&>)
92
90
  .define_method("result", &theta_union::get_result, Rice::Arg("ordered")=true);
93
91
 
@@ -1,7 +1,6 @@
1
- #include <sstream>
1
+ #include <cstdint>
2
2
 
3
3
  #include <rice/rice.hpp>
4
- #include <rice/stl.hpp>
5
4
  #include <var_opt_sketch.hpp>
6
5
 
7
6
  using datasketches::var_opt_sketch;
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -342,7 +342,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
342
342
  - !ruby/object:Gem::Version
343
343
  version: '0'
344
344
  requirements: []
345
- rubygems_version: 3.6.9
345
+ rubygems_version: 4.0.6
346
346
  specification_version: 4
347
347
  summary: Sketch data structures for Ruby
348
348
  test_files: []