datasketches 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +7 -7
  4. data/ext/datasketches/theta_wrapper.cpp +20 -4
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +22 -3
  7. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  8. data/vendor/datasketches-cpp/README.md +76 -9
  9. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  10. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  11. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  12. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  13. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -6
  14. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  15. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  16. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +4 -2
  17. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  18. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  19. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  20. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +4 -2
  21. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  22. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +4 -2
  23. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  24. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +13 -7
  25. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +8 -6
  26. data/vendor/datasketches-cpp/setup.py +1 -1
  27. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  28. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +89 -22
  29. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  30. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  31. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  32. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  33. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +146 -51
  34. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  35. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  36. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +8 -2
  37. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  38. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -4
  39. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -9
  40. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  41. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  42. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  43. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  44. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  45. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  46. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  47. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +400 -0
  48. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +23 -11
  49. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  50. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  51. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  52. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  53. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  54. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  55. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +33 -14
  56. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  57. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  58. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  59. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  60. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +7 -0
  61. metadata +11 -6
  62. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  63. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -20,8 +20,8 @@
20
20
  namespace datasketches {
21
21
 
22
22
  template<typename A>
23
- array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
24
- Base(lg_cur_size, lg_nom_size, rf, theta, seed, policy, allocator)
23
+ array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
24
+ Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator)
25
25
  {}
26
26
 
27
27
  template<typename A>
@@ -37,7 +37,7 @@ tuple_base_builder<builder, Policy, A>(policy, allocator) {}
37
37
 
38
38
  template<typename A>
39
39
  array_of_doubles_union_alloc<A> array_of_doubles_union_alloc<A>::builder::build() const {
40
- return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
40
+ return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
41
41
  }
42
42
 
43
43
  } /* namespace datasketches */
@@ -153,8 +153,7 @@ public:
153
153
  virtual const_iterator end() const = 0;
154
154
 
155
155
  protected:
156
- using ostrstream = std::basic_ostringstream<char, std::char_traits<char>, AllocChar<Allocator>>;
157
- virtual void print_specifics(ostrstream& os) const = 0;
156
+ virtual void print_specifics(std::ostringstream& os) const = 0;
158
157
 
159
158
  static uint16_t get_seed_hash(uint64_t seed);
160
159
 
@@ -325,6 +324,11 @@ public:
325
324
  */
326
325
  void trim();
327
326
 
327
+ /**
328
+ * Reset the sketch to the initial empty state
329
+ */
330
+ void reset();
331
+
328
332
  /**
329
333
  * Converts this sketch to a compact sketch (ordered or unordered).
330
334
  * @param ordered optional flag to specify if ordered sketch should be produced
@@ -342,10 +346,9 @@ protected:
342
346
  tuple_map map_;
343
347
 
344
348
  // for builder
345
- update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
349
+ update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
346
350
 
347
- using ostrstream = typename Base::ostrstream;
348
- virtual void print_specifics(ostrstream& os) const;
351
+ virtual void print_specifics(std::ostringstream& os) const;
349
352
  };
350
353
 
351
354
  // compact sketch
@@ -367,9 +370,11 @@ public:
367
370
  using vector_bytes = std::vector<uint8_t, AllocBytes>;
368
371
  using comparator = compare_by_key<ExtractKey>;
369
372
 
370
- static const uint8_t SERIAL_VERSION = 1;
373
+ static const uint8_t SERIAL_VERSION_LEGACY = 1;
374
+ static const uint8_t SERIAL_VERSION = 3;
371
375
  static const uint8_t SKETCH_FAMILY = 9;
372
- static const uint8_t SKETCH_TYPE = 5;
376
+ static const uint8_t SKETCH_TYPE = 1;
377
+ static const uint8_t SKETCH_TYPE_LEGACY = 5;
373
378
  enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
374
379
 
375
380
  // Instances of this type can be obtained:
@@ -473,8 +478,7 @@ protected:
473
478
  bool destroy_;
474
479
  };
475
480
 
476
- using ostrstream = typename Base::ostrstream;
477
- virtual void print_specifics(ostrstream& os) const;
481
+ virtual void print_specifics(std::ostringstream& os) const;
478
482
 
479
483
  };
480
484
 
@@ -53,7 +53,9 @@ double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs) const {
53
53
 
54
54
  template<typename S, typename A>
55
55
  string<A> tuple_sketch<S, A>::to_string(bool detail) const {
56
- ostrstream os;
56
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
57
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
58
+ std::ostringstream os;
57
59
  os << "### Tuple sketch summary:" << std::endl;
58
60
  os << " num retained entries : " << get_num_retained() << std::endl;
59
61
  os << " seed hash : " << get_seed_hash() << std::endl;
@@ -74,15 +76,15 @@ string<A> tuple_sketch<S, A>::to_string(bool detail) const {
74
76
  }
75
77
  os << "### End retained entries" << std::endl;
76
78
  }
77
- return os.str();
79
+ return string<A>(os.str().c_str(), get_allocator());
78
80
  }
79
81
 
80
82
  // update sketch
81
83
 
82
84
  template<typename S, typename U, typename P, typename A>
83
- update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
85
+ update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
84
86
  policy_(policy),
85
- map_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
87
+ map_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
86
88
  {}
87
89
 
88
90
  template<typename S, typename U, typename P, typename A>
@@ -97,12 +99,12 @@ bool update_tuple_sketch<S, U, P, A>::is_empty() const {
97
99
 
98
100
  template<typename S, typename U, typename P, typename A>
99
101
  bool update_tuple_sketch<S, U, P, A>::is_ordered() const {
100
- return false;
102
+ return map_.num_entries_ > 1 ? false : true;;
101
103
  }
102
104
 
103
105
  template<typename S, typename U, typename P, typename A>
104
106
  uint64_t update_tuple_sketch<S, U, P, A>::get_theta64() const {
105
- return map_.theta_;
107
+ return is_empty() ? theta_constants::MAX_THETA : map_.theta_;
106
108
  }
107
109
 
108
110
  template<typename S, typename U, typename P, typename A>
@@ -212,6 +214,11 @@ void update_tuple_sketch<S, U, P, A>::trim() {
212
214
  map_.trim();
213
215
  }
214
216
 
217
+ template<typename S, typename U, typename P, typename A>
218
+ void update_tuple_sketch<S, U, P, A>::reset() {
219
+ map_.reset();
220
+ }
221
+
215
222
  template<typename S, typename U, typename P, typename A>
216
223
  auto update_tuple_sketch<S, U, P, A>::begin() -> iterator {
217
224
  return iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
@@ -238,7 +245,7 @@ compact_tuple_sketch<S, A> update_tuple_sketch<S, U, P, A>::compact(bool ordered
238
245
  }
239
246
 
240
247
  template<typename S, typename U, typename P, typename A>
241
- void update_tuple_sketch<S, U, P, A>::print_specifics(ostrstream& os) const {
248
+ void update_tuple_sketch<S, U, P, A>::print_specifics(std::ostringstream& os) const {
242
249
  os << " lg nominal size : " << (int) map_.lg_nom_size_ << std::endl;
243
250
  os << " lg current size : " << (int) map_.lg_cur_size_ << std::endl;
244
251
  os << " resize factor : " << (1 << map_.rf_) << std::endl;
@@ -250,7 +257,7 @@ template<typename S, typename A>
250
257
  compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
251
258
  std::vector<Entry, AllocEntry>&& entries):
252
259
  is_empty_(is_empty),
253
- is_ordered_(is_ordered),
260
+ is_ordered_(is_ordered || (entries.size() <= 1ULL)),
254
261
  seed_hash_(seed_hash),
255
262
  theta_(theta),
256
263
  entries_(std::move(entries))
@@ -437,9 +444,15 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream&
437
444
  read<uint8_t>(is); // unused
438
445
  const auto flags_byte = read<uint8_t>(is);
439
446
  const auto seed_hash = read<uint16_t>(is);
440
- checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
447
+ if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
448
+ throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
449
+ + std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
450
+ }
441
451
  checker<true>::check_sketch_family(family, SKETCH_FAMILY);
442
- checker<true>::check_sketch_type(type, SKETCH_TYPE);
452
+ if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
453
+ throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
454
+ + std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
455
+ }
443
456
  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
444
457
  if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
445
458
 
@@ -492,9 +505,15 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
492
505
  ptr += copy_from_mem(ptr, flags_byte);
493
506
  uint16_t seed_hash;
494
507
  ptr += copy_from_mem(ptr, seed_hash);
495
- checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
508
+ if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
509
+ throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
510
+ + std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
511
+ }
496
512
  checker<true>::check_sketch_family(family, SKETCH_FAMILY);
497
- checker<true>::check_sketch_type(type, SKETCH_TYPE);
513
+ if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
514
+ throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
515
+ + std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
516
+ }
498
517
  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
499
518
  if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
500
519
 
@@ -554,7 +573,7 @@ auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
554
573
  }
555
574
 
556
575
  template<typename S, typename A>
557
- void compact_tuple_sketch<S, A>::print_specifics(ostrstream&) const {}
576
+ void compact_tuple_sketch<S, A>::print_specifics(std::ostringstream&) const {}
558
577
 
559
578
  // builder
560
579
 
@@ -568,7 +587,7 @@ tuple_base_builder<builder, P, A>(policy, allocator) {}
568
587
 
569
588
  template<typename S, typename U, typename P, typename A>
570
589
  auto update_tuple_sketch<S, U, P, A>::builder::build() const -> update_tuple_sketch {
571
- return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
590
+ return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
572
591
  }
573
592
 
574
593
  } /* namespace datasketches */
@@ -80,11 +80,16 @@ public:
80
80
  */
81
81
  CompactSketch get_result(bool ordered = true) const;
82
82
 
83
+ /**
84
+ * Reset the union to the initial empty state
85
+ */
86
+ void reset();
87
+
83
88
  protected:
84
89
  State state_;
85
90
 
86
91
  // for builder
87
- tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
92
+ tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
88
93
  };
89
94
 
90
95
  template<typename S, typename P, typename A>
@@ -20,8 +20,8 @@
20
20
  namespace datasketches {
21
21
 
22
22
  template<typename S, typename P, typename A>
23
- tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
24
- state_(lg_cur_size, lg_nom_size, rf, theta, seed, internal_policy(policy), allocator)
23
+ tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
24
+ state_(lg_cur_size, lg_nom_size, rf, p, theta, seed, internal_policy(policy), allocator)
25
25
  {}
26
26
 
27
27
  template<typename S, typename P, typename A>
@@ -35,13 +35,18 @@ auto tuple_union<S, P, A>::get_result(bool ordered) const -> CompactSketch {
35
35
  return state_.get_result(ordered);
36
36
  }
37
37
 
38
+ template<typename S, typename P, typename A>
39
+ void tuple_union<S, P, A>::reset() {
40
+ return state_.reset();
41
+ }
42
+
38
43
  template<typename S, typename P, typename A>
39
44
  tuple_union<S, P, A>::builder::builder(const P& policy, const A& allocator):
40
45
  tuple_base_builder<builder, P, A>(policy, allocator) {}
41
46
 
42
47
  template<typename S, typename P, typename A>
43
48
  auto tuple_union<S, P, A>::builder::build() const -> tuple_union {
44
- return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
49
+ return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
45
50
  }
46
51
 
47
52
  } /* namespace datasketches */
@@ -36,6 +36,17 @@ const std::string inputPath = TEST_BINARY_INPUT_PATH;
36
36
  const std::string inputPath = "test/";
37
37
  #endif
38
38
 
39
+ TEST_CASE("aod sketch: reset", "[tuple_sketch]") {
40
+ auto update_sketch = update_array_of_doubles_sketch::builder().build();
41
+ std::vector<double> a = {1};
42
+ update_sketch.update(1, a);
43
+ REQUIRE(!update_sketch.is_empty());
44
+ REQUIRE(update_sketch.get_num_retained() == 1);
45
+ update_sketch.reset();
46
+ REQUIRE(update_sketch.is_empty());
47
+ REQUIRE(update_sketch.get_num_retained() == 0);
48
+ }
49
+
39
50
  TEST_CASE("aod sketch: serialization compatibility with java - empty", "[tuple_sketch]") {
40
51
  auto update_sketch = update_array_of_doubles_sketch::builder().build();
41
52
  REQUIRE(update_sketch.is_empty());
@@ -263,6 +274,11 @@ TEST_CASE("aod union: half overlap", "[tuple_sketch]") {
263
274
  u.update(update_sketch2);
264
275
  auto result = u.get_result();
265
276
  REQUIRE(result.get_estimate() == Approx(1500).margin(0.01));
277
+
278
+ u.reset();
279
+ result = u.get_result();
280
+ REQUIRE(result.is_empty());
281
+ REQUIRE(result.get_num_retained() == 0);
266
282
  }
267
283
 
268
284
  TEST_CASE("aod intersection: half overlap", "[tuple_sketch]") {
@@ -34,7 +34,6 @@ std::ostream& operator<<(std::ostream& os, const three_doubles& tuple) {
34
34
 
35
35
  #include <catch.hpp>
36
36
  #include <tuple_sketch.hpp>
37
- //#include <test_type.hpp>
38
37
 
39
38
  namespace datasketches {
40
39
 
@@ -43,9 +42,11 @@ TEST_CASE("tuple sketch float: builder", "[tuple_sketch]") {
43
42
  builder.set_lg_k(10).set_p(0.5f).set_resize_factor(theta_constants::resize_factor::X2).set_seed(123);
44
43
  auto sketch = builder.build();
45
44
  REQUIRE(sketch.get_lg_k() == 10);
46
- REQUIRE(sketch.get_theta() == 0.5);
45
+ REQUIRE(sketch.get_theta() == 1.0); // empty sketch should have theta 1.0
47
46
  REQUIRE(sketch.get_rf() == theta_constants::resize_factor::X2);
48
47
  REQUIRE(sketch.get_seed_hash() == compute_seed_hash(123));
48
+ sketch.update(1, 0);
49
+ REQUIRE(sketch.get_theta() == 0.5); // theta = p
49
50
  }
50
51
 
51
52
  TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
@@ -58,7 +59,7 @@ TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
58
59
  REQUIRE(update_sketch.get_upper_bound(1) == 0);
59
60
  REQUIRE(update_sketch.get_theta() == 1);
60
61
  REQUIRE(update_sketch.get_num_retained() == 0);
61
- REQUIRE(!update_sketch.is_ordered());
62
+ REQUIRE(update_sketch.is_ordered());
62
63
 
63
64
  auto compact_sketch = update_sketch.compact();
64
65
  std::cout << "sizeof(compact_tuple_sketch<float>)=" << sizeof(compact_sketch) << std::endl;
@@ -70,6 +71,33 @@ TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
70
71
  REQUIRE(compact_sketch.get_theta() == 1);
71
72
  REQUIRE(compact_sketch.get_num_retained() == 0);
72
73
  REQUIRE(compact_sketch.is_ordered());
74
+
75
+ // empty is forced to be ordered
76
+ REQUIRE(update_sketch.compact(false).is_ordered());
77
+ }
78
+
79
+ TEST_CASE("tuple sketch: single item", "[theta_sketch]") {
80
+ auto update_sketch = update_tuple_sketch<float>::builder().build();
81
+ update_sketch.update(1, 1.0f);
82
+ REQUIRE_FALSE(update_sketch.is_empty());
83
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
84
+ REQUIRE(update_sketch.get_theta() == 1.0);
85
+ REQUIRE(update_sketch.get_estimate() == 1.0);
86
+ REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
87
+ REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
88
+ REQUIRE(update_sketch.is_ordered()); // one item is ordered
89
+
90
+ auto compact_sketch = update_sketch.compact();
91
+ REQUIRE_FALSE(compact_sketch.is_empty());
92
+ REQUIRE_FALSE(compact_sketch.is_estimation_mode());
93
+ REQUIRE(compact_sketch.get_theta() == 1.0);
94
+ REQUIRE(compact_sketch.get_estimate() == 1.0);
95
+ REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
96
+ REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
97
+ REQUIRE(compact_sketch.is_ordered());
98
+
99
+ // single item is forced to be ordered
100
+ REQUIRE(update_sketch.compact(false).is_ordered());
73
101
  }
74
102
 
75
103
  TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
@@ -78,14 +106,14 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
78
106
  update_sketch.update(2, 2.0f);
79
107
  update_sketch.update(1, 1.0f);
80
108
  // std::cout << update_sketch.to_string(true);
81
- REQUIRE(!update_sketch.is_empty());
82
- REQUIRE(!update_sketch.is_estimation_mode());
109
+ REQUIRE_FALSE(update_sketch.is_empty());
110
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
83
111
  REQUIRE(update_sketch.get_estimate() == 2);
84
112
  REQUIRE(update_sketch.get_lower_bound(1) == 2);
85
113
  REQUIRE(update_sketch.get_upper_bound(1) == 2);
86
114
  REQUIRE(update_sketch.get_theta() == 1);
87
115
  REQUIRE(update_sketch.get_num_retained() == 2);
88
- REQUIRE(!update_sketch.is_ordered());
116
+ REQUIRE_FALSE(update_sketch.is_ordered());
89
117
  int count = 0;
90
118
  for (const auto& entry: update_sketch) {
91
119
  REQUIRE(entry.second == 2);
@@ -95,8 +123,8 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
95
123
 
96
124
  auto compact_sketch = update_sketch.compact();
97
125
  // std::cout << compact_sketch.to_string(true);
98
- REQUIRE(!compact_sketch.is_empty());
99
- REQUIRE(!compact_sketch.is_estimation_mode());
126
+ REQUIRE_FALSE(compact_sketch.is_empty());
127
+ REQUIRE_FALSE(compact_sketch.is_estimation_mode());
100
128
  REQUIRE(compact_sketch.get_estimate() == 2);
101
129
  REQUIRE(compact_sketch.get_lower_bound(1) == 2);
102
130
  REQUIRE(compact_sketch.get_upper_bound(1) == 2);
@@ -151,6 +179,16 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
151
179
  ++it;
152
180
  }
153
181
  }
182
+
183
+ update_sketch.reset();
184
+ REQUIRE(update_sketch.is_empty());
185
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
186
+ REQUIRE(update_sketch.get_estimate() == 0);
187
+ REQUIRE(update_sketch.get_lower_bound(1) == 0);
188
+ REQUIRE(update_sketch.get_upper_bound(1) == 0);
189
+ REQUIRE(update_sketch.get_theta() == 1);
190
+ REQUIRE(update_sketch.get_num_retained() == 0);
191
+ REQUIRE(update_sketch.is_ordered());
154
192
  }
155
193
 
156
194
  template<typename T>
@@ -81,6 +81,13 @@ TEST_CASE("tuple_union float: simple case", "[tuple union]") {
81
81
  u.update(update_sketch2);
82
82
  auto result = u.get_result();
83
83
  REQUIRE(result.get_num_retained() == 3);
84
+
85
+ u.reset();
86
+ result = u.get_result();
87
+ REQUIRE(result.is_empty());
88
+ REQUIRE(result.get_num_retained() == 0);
89
+ REQUIRE(!result.is_estimation_mode());
90
+ REQUIRE(result.get_estimate() == 0);
84
91
  }
85
92
 
86
93
  TEST_CASE("tuple_union float: exact mode half overlap", "[tuple union]") {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-09-29 00:00:00.000000000 Z
11
+ date: 2021-12-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -51,6 +51,7 @@ files:
51
51
  - vendor/datasketches-cpp/MANIFEST.in
52
52
  - vendor/datasketches-cpp/NOTICE
53
53
  - vendor/datasketches-cpp/README.md
54
+ - vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in
54
55
  - vendor/datasketches-cpp/common/CMakeLists.txt
55
56
  - vendor/datasketches-cpp/common/include/MurmurHash3.h
56
57
  - vendor/datasketches-cpp/common/include/binomial_bounds.hpp
@@ -256,14 +257,18 @@ files:
256
257
  - vendor/datasketches-cpp/theta/test/CMakeLists.txt
257
258
  - vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp
258
259
  - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk
260
+ - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk
261
+ - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk
259
262
  - vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk
263
+ - vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk
264
+ - vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk
265
+ - vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk
260
266
  - vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk
261
267
  - vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp
262
268
  - vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp
269
+ - vendor/datasketches-cpp/theta/test/theta_setop_test.cpp
263
270
  - vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp
264
271
  - vendor/datasketches-cpp/theta/test/theta_union_test.cpp
265
- - vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk
266
- - vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk
267
272
  - vendor/datasketches-cpp/tuple/CMakeLists.txt
268
273
  - vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp
269
274
  - vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp
@@ -295,7 +300,7 @@ files:
295
300
  - vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp
296
301
  - vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp
297
302
  - vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp
298
- homepage: https://github.com/ankane/datasketches
303
+ homepage: https://github.com/ankane/datasketches-ruby
299
304
  licenses:
300
305
  - Apache-2.0
301
306
  metadata: {}
@@ -314,7 +319,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
314
319
  - !ruby/object:Gem::Version
315
320
  version: '0'
316
321
  requirements: []
317
- rubygems_version: 3.2.22
322
+ rubygems_version: 3.3.3
318
323
  signing_key:
319
324
  specification_version: 4
320
325
  summary: Sketch data structures for Ruby