datasketches 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +7 -7
  4. data/ext/datasketches/theta_wrapper.cpp +20 -4
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +22 -3
  7. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  8. data/vendor/datasketches-cpp/README.md +76 -9
  9. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  10. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  11. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  12. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  13. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -6
  14. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  15. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  16. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +4 -2
  17. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  18. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  19. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  20. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +4 -2
  21. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  22. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +4 -2
  23. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  24. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +13 -7
  25. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +8 -6
  26. data/vendor/datasketches-cpp/setup.py +1 -1
  27. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  28. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +89 -22
  29. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  30. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  31. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  32. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  33. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +146 -51
  34. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  35. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  36. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +8 -2
  37. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  38. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -4
  39. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -9
  40. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  41. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  42. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  43. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  44. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  45. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  46. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  47. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +400 -0
  48. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +23 -11
  49. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  50. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  51. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  52. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  53. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  54. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  55. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +33 -14
  56. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  57. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  58. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  59. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  60. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +7 -0
  61. metadata +11 -6
  62. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  63. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -20,8 +20,8 @@
20
20
  namespace datasketches {
21
21
 
22
22
  template<typename A>
23
- array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
24
- Base(lg_cur_size, lg_nom_size, rf, theta, seed, policy, allocator)
23
+ array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
24
+ Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator)
25
25
  {}
26
26
 
27
27
  template<typename A>
@@ -37,7 +37,7 @@ tuple_base_builder<builder, Policy, A>(policy, allocator) {}
37
37
 
38
38
  template<typename A>
39
39
  array_of_doubles_union_alloc<A> array_of_doubles_union_alloc<A>::builder::build() const {
40
- return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
40
+ return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
41
41
  }
42
42
 
43
43
  } /* namespace datasketches */
@@ -153,8 +153,7 @@ public:
153
153
  virtual const_iterator end() const = 0;
154
154
 
155
155
  protected:
156
- using ostrstream = std::basic_ostringstream<char, std::char_traits<char>, AllocChar<Allocator>>;
157
- virtual void print_specifics(ostrstream& os) const = 0;
156
+ virtual void print_specifics(std::ostringstream& os) const = 0;
158
157
 
159
158
  static uint16_t get_seed_hash(uint64_t seed);
160
159
 
@@ -325,6 +324,11 @@ public:
325
324
  */
326
325
  void trim();
327
326
 
327
+ /**
328
+ * Reset the sketch to the initial empty state
329
+ */
330
+ void reset();
331
+
328
332
  /**
329
333
  * Converts this sketch to a compact sketch (ordered or unordered).
330
334
  * @param ordered optional flag to specify if ordered sketch should be produced
@@ -342,10 +346,9 @@ protected:
342
346
  tuple_map map_;
343
347
 
344
348
  // for builder
345
- update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
349
+ update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
346
350
 
347
- using ostrstream = typename Base::ostrstream;
348
- virtual void print_specifics(ostrstream& os) const;
351
+ virtual void print_specifics(std::ostringstream& os) const;
349
352
  };
350
353
 
351
354
  // compact sketch
@@ -367,9 +370,11 @@ public:
367
370
  using vector_bytes = std::vector<uint8_t, AllocBytes>;
368
371
  using comparator = compare_by_key<ExtractKey>;
369
372
 
370
- static const uint8_t SERIAL_VERSION = 1;
373
+ static const uint8_t SERIAL_VERSION_LEGACY = 1;
374
+ static const uint8_t SERIAL_VERSION = 3;
371
375
  static const uint8_t SKETCH_FAMILY = 9;
372
- static const uint8_t SKETCH_TYPE = 5;
376
+ static const uint8_t SKETCH_TYPE = 1;
377
+ static const uint8_t SKETCH_TYPE_LEGACY = 5;
373
378
  enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
374
379
 
375
380
  // Instances of this type can be obtained:
@@ -473,8 +478,7 @@ protected:
473
478
  bool destroy_;
474
479
  };
475
480
 
476
- using ostrstream = typename Base::ostrstream;
477
- virtual void print_specifics(ostrstream& os) const;
481
+ virtual void print_specifics(std::ostringstream& os) const;
478
482
 
479
483
  };
480
484
 
@@ -53,7 +53,9 @@ double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs) const {
53
53
 
54
54
  template<typename S, typename A>
55
55
  string<A> tuple_sketch<S, A>::to_string(bool detail) const {
56
- ostrstream os;
56
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
57
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
58
+ std::ostringstream os;
57
59
  os << "### Tuple sketch summary:" << std::endl;
58
60
  os << " num retained entries : " << get_num_retained() << std::endl;
59
61
  os << " seed hash : " << get_seed_hash() << std::endl;
@@ -74,15 +76,15 @@ string<A> tuple_sketch<S, A>::to_string(bool detail) const {
74
76
  }
75
77
  os << "### End retained entries" << std::endl;
76
78
  }
77
- return os.str();
79
+ return string<A>(os.str().c_str(), get_allocator());
78
80
  }
79
81
 
80
82
  // update sketch
81
83
 
82
84
  template<typename S, typename U, typename P, typename A>
83
- update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
85
+ update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
84
86
  policy_(policy),
85
- map_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
87
+ map_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
86
88
  {}
87
89
 
88
90
  template<typename S, typename U, typename P, typename A>
@@ -97,12 +99,12 @@ bool update_tuple_sketch<S, U, P, A>::is_empty() const {
97
99
 
98
100
  template<typename S, typename U, typename P, typename A>
99
101
  bool update_tuple_sketch<S, U, P, A>::is_ordered() const {
100
- return false;
102
+ return map_.num_entries_ > 1 ? false : true;;
101
103
  }
102
104
 
103
105
  template<typename S, typename U, typename P, typename A>
104
106
  uint64_t update_tuple_sketch<S, U, P, A>::get_theta64() const {
105
- return map_.theta_;
107
+ return is_empty() ? theta_constants::MAX_THETA : map_.theta_;
106
108
  }
107
109
 
108
110
  template<typename S, typename U, typename P, typename A>
@@ -212,6 +214,11 @@ void update_tuple_sketch<S, U, P, A>::trim() {
212
214
  map_.trim();
213
215
  }
214
216
 
217
+ template<typename S, typename U, typename P, typename A>
218
+ void update_tuple_sketch<S, U, P, A>::reset() {
219
+ map_.reset();
220
+ }
221
+
215
222
  template<typename S, typename U, typename P, typename A>
216
223
  auto update_tuple_sketch<S, U, P, A>::begin() -> iterator {
217
224
  return iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
@@ -238,7 +245,7 @@ compact_tuple_sketch<S, A> update_tuple_sketch<S, U, P, A>::compact(bool ordered
238
245
  }
239
246
 
240
247
  template<typename S, typename U, typename P, typename A>
241
- void update_tuple_sketch<S, U, P, A>::print_specifics(ostrstream& os) const {
248
+ void update_tuple_sketch<S, U, P, A>::print_specifics(std::ostringstream& os) const {
242
249
  os << " lg nominal size : " << (int) map_.lg_nom_size_ << std::endl;
243
250
  os << " lg current size : " << (int) map_.lg_cur_size_ << std::endl;
244
251
  os << " resize factor : " << (1 << map_.rf_) << std::endl;
@@ -250,7 +257,7 @@ template<typename S, typename A>
250
257
  compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
251
258
  std::vector<Entry, AllocEntry>&& entries):
252
259
  is_empty_(is_empty),
253
- is_ordered_(is_ordered),
260
+ is_ordered_(is_ordered || (entries.size() <= 1ULL)),
254
261
  seed_hash_(seed_hash),
255
262
  theta_(theta),
256
263
  entries_(std::move(entries))
@@ -437,9 +444,15 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream&
437
444
  read<uint8_t>(is); // unused
438
445
  const auto flags_byte = read<uint8_t>(is);
439
446
  const auto seed_hash = read<uint16_t>(is);
440
- checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
447
+ if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
448
+ throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
449
+ + std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
450
+ }
441
451
  checker<true>::check_sketch_family(family, SKETCH_FAMILY);
442
- checker<true>::check_sketch_type(type, SKETCH_TYPE);
452
+ if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
453
+ throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
454
+ + std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
455
+ }
443
456
  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
444
457
  if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
445
458
 
@@ -492,9 +505,15 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
492
505
  ptr += copy_from_mem(ptr, flags_byte);
493
506
  uint16_t seed_hash;
494
507
  ptr += copy_from_mem(ptr, seed_hash);
495
- checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
508
+ if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
509
+ throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
510
+ + std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
511
+ }
496
512
  checker<true>::check_sketch_family(family, SKETCH_FAMILY);
497
- checker<true>::check_sketch_type(type, SKETCH_TYPE);
513
+ if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
514
+ throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
515
+ + std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
516
+ }
498
517
  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
499
518
  if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
500
519
 
@@ -554,7 +573,7 @@ auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
554
573
  }
555
574
 
556
575
  template<typename S, typename A>
557
- void compact_tuple_sketch<S, A>::print_specifics(ostrstream&) const {}
576
+ void compact_tuple_sketch<S, A>::print_specifics(std::ostringstream&) const {}
558
577
 
559
578
  // builder
560
579
 
@@ -568,7 +587,7 @@ tuple_base_builder<builder, P, A>(policy, allocator) {}
568
587
 
569
588
  template<typename S, typename U, typename P, typename A>
570
589
  auto update_tuple_sketch<S, U, P, A>::builder::build() const -> update_tuple_sketch {
571
- return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
590
+ return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
572
591
  }
573
592
 
574
593
  } /* namespace datasketches */
@@ -80,11 +80,16 @@ public:
80
80
  */
81
81
  CompactSketch get_result(bool ordered = true) const;
82
82
 
83
+ /**
84
+ * Reset the union to the initial empty state
85
+ */
86
+ void reset();
87
+
83
88
  protected:
84
89
  State state_;
85
90
 
86
91
  // for builder
87
- tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
92
+ tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
88
93
  };
89
94
 
90
95
  template<typename S, typename P, typename A>
@@ -20,8 +20,8 @@
20
20
  namespace datasketches {
21
21
 
22
22
  template<typename S, typename P, typename A>
23
- tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
24
- state_(lg_cur_size, lg_nom_size, rf, theta, seed, internal_policy(policy), allocator)
23
+ tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
24
+ state_(lg_cur_size, lg_nom_size, rf, p, theta, seed, internal_policy(policy), allocator)
25
25
  {}
26
26
 
27
27
  template<typename S, typename P, typename A>
@@ -35,13 +35,18 @@ auto tuple_union<S, P, A>::get_result(bool ordered) const -> CompactSketch {
35
35
  return state_.get_result(ordered);
36
36
  }
37
37
 
38
+ template<typename S, typename P, typename A>
39
+ void tuple_union<S, P, A>::reset() {
40
+ return state_.reset();
41
+ }
42
+
38
43
  template<typename S, typename P, typename A>
39
44
  tuple_union<S, P, A>::builder::builder(const P& policy, const A& allocator):
40
45
  tuple_base_builder<builder, P, A>(policy, allocator) {}
41
46
 
42
47
  template<typename S, typename P, typename A>
43
48
  auto tuple_union<S, P, A>::builder::build() const -> tuple_union {
44
- return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
49
+ return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
45
50
  }
46
51
 
47
52
  } /* namespace datasketches */
@@ -36,6 +36,17 @@ const std::string inputPath = TEST_BINARY_INPUT_PATH;
36
36
  const std::string inputPath = "test/";
37
37
  #endif
38
38
 
39
+ TEST_CASE("aod sketch: reset", "[tuple_sketch]") {
40
+ auto update_sketch = update_array_of_doubles_sketch::builder().build();
41
+ std::vector<double> a = {1};
42
+ update_sketch.update(1, a);
43
+ REQUIRE(!update_sketch.is_empty());
44
+ REQUIRE(update_sketch.get_num_retained() == 1);
45
+ update_sketch.reset();
46
+ REQUIRE(update_sketch.is_empty());
47
+ REQUIRE(update_sketch.get_num_retained() == 0);
48
+ }
49
+
39
50
  TEST_CASE("aod sketch: serialization compatibility with java - empty", "[tuple_sketch]") {
40
51
  auto update_sketch = update_array_of_doubles_sketch::builder().build();
41
52
  REQUIRE(update_sketch.is_empty());
@@ -263,6 +274,11 @@ TEST_CASE("aod union: half overlap", "[tuple_sketch]") {
263
274
  u.update(update_sketch2);
264
275
  auto result = u.get_result();
265
276
  REQUIRE(result.get_estimate() == Approx(1500).margin(0.01));
277
+
278
+ u.reset();
279
+ result = u.get_result();
280
+ REQUIRE(result.is_empty());
281
+ REQUIRE(result.get_num_retained() == 0);
266
282
  }
267
283
 
268
284
  TEST_CASE("aod intersection: half overlap", "[tuple_sketch]") {
@@ -34,7 +34,6 @@ std::ostream& operator<<(std::ostream& os, const three_doubles& tuple) {
34
34
 
35
35
  #include <catch.hpp>
36
36
  #include <tuple_sketch.hpp>
37
- //#include <test_type.hpp>
38
37
 
39
38
  namespace datasketches {
40
39
 
@@ -43,9 +42,11 @@ TEST_CASE("tuple sketch float: builder", "[tuple_sketch]") {
43
42
  builder.set_lg_k(10).set_p(0.5f).set_resize_factor(theta_constants::resize_factor::X2).set_seed(123);
44
43
  auto sketch = builder.build();
45
44
  REQUIRE(sketch.get_lg_k() == 10);
46
- REQUIRE(sketch.get_theta() == 0.5);
45
+ REQUIRE(sketch.get_theta() == 1.0); // empty sketch should have theta 1.0
47
46
  REQUIRE(sketch.get_rf() == theta_constants::resize_factor::X2);
48
47
  REQUIRE(sketch.get_seed_hash() == compute_seed_hash(123));
48
+ sketch.update(1, 0);
49
+ REQUIRE(sketch.get_theta() == 0.5); // theta = p
49
50
  }
50
51
 
51
52
  TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
@@ -58,7 +59,7 @@ TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
58
59
  REQUIRE(update_sketch.get_upper_bound(1) == 0);
59
60
  REQUIRE(update_sketch.get_theta() == 1);
60
61
  REQUIRE(update_sketch.get_num_retained() == 0);
61
- REQUIRE(!update_sketch.is_ordered());
62
+ REQUIRE(update_sketch.is_ordered());
62
63
 
63
64
  auto compact_sketch = update_sketch.compact();
64
65
  std::cout << "sizeof(compact_tuple_sketch<float>)=" << sizeof(compact_sketch) << std::endl;
@@ -70,6 +71,33 @@ TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
70
71
  REQUIRE(compact_sketch.get_theta() == 1);
71
72
  REQUIRE(compact_sketch.get_num_retained() == 0);
72
73
  REQUIRE(compact_sketch.is_ordered());
74
+
75
+ // empty is forced to be ordered
76
+ REQUIRE(update_sketch.compact(false).is_ordered());
77
+ }
78
+
79
+ TEST_CASE("tuple sketch: single item", "[theta_sketch]") {
80
+ auto update_sketch = update_tuple_sketch<float>::builder().build();
81
+ update_sketch.update(1, 1.0f);
82
+ REQUIRE_FALSE(update_sketch.is_empty());
83
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
84
+ REQUIRE(update_sketch.get_theta() == 1.0);
85
+ REQUIRE(update_sketch.get_estimate() == 1.0);
86
+ REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
87
+ REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
88
+ REQUIRE(update_sketch.is_ordered()); // one item is ordered
89
+
90
+ auto compact_sketch = update_sketch.compact();
91
+ REQUIRE_FALSE(compact_sketch.is_empty());
92
+ REQUIRE_FALSE(compact_sketch.is_estimation_mode());
93
+ REQUIRE(compact_sketch.get_theta() == 1.0);
94
+ REQUIRE(compact_sketch.get_estimate() == 1.0);
95
+ REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
96
+ REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
97
+ REQUIRE(compact_sketch.is_ordered());
98
+
99
+ // single item is forced to be ordered
100
+ REQUIRE(update_sketch.compact(false).is_ordered());
73
101
  }
74
102
 
75
103
  TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
@@ -78,14 +106,14 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
78
106
  update_sketch.update(2, 2.0f);
79
107
  update_sketch.update(1, 1.0f);
80
108
  // std::cout << update_sketch.to_string(true);
81
- REQUIRE(!update_sketch.is_empty());
82
- REQUIRE(!update_sketch.is_estimation_mode());
109
+ REQUIRE_FALSE(update_sketch.is_empty());
110
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
83
111
  REQUIRE(update_sketch.get_estimate() == 2);
84
112
  REQUIRE(update_sketch.get_lower_bound(1) == 2);
85
113
  REQUIRE(update_sketch.get_upper_bound(1) == 2);
86
114
  REQUIRE(update_sketch.get_theta() == 1);
87
115
  REQUIRE(update_sketch.get_num_retained() == 2);
88
- REQUIRE(!update_sketch.is_ordered());
116
+ REQUIRE_FALSE(update_sketch.is_ordered());
89
117
  int count = 0;
90
118
  for (const auto& entry: update_sketch) {
91
119
  REQUIRE(entry.second == 2);
@@ -95,8 +123,8 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
95
123
 
96
124
  auto compact_sketch = update_sketch.compact();
97
125
  // std::cout << compact_sketch.to_string(true);
98
- REQUIRE(!compact_sketch.is_empty());
99
- REQUIRE(!compact_sketch.is_estimation_mode());
126
+ REQUIRE_FALSE(compact_sketch.is_empty());
127
+ REQUIRE_FALSE(compact_sketch.is_estimation_mode());
100
128
  REQUIRE(compact_sketch.get_estimate() == 2);
101
129
  REQUIRE(compact_sketch.get_lower_bound(1) == 2);
102
130
  REQUIRE(compact_sketch.get_upper_bound(1) == 2);
@@ -151,6 +179,16 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
151
179
  ++it;
152
180
  }
153
181
  }
182
+
183
+ update_sketch.reset();
184
+ REQUIRE(update_sketch.is_empty());
185
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
186
+ REQUIRE(update_sketch.get_estimate() == 0);
187
+ REQUIRE(update_sketch.get_lower_bound(1) == 0);
188
+ REQUIRE(update_sketch.get_upper_bound(1) == 0);
189
+ REQUIRE(update_sketch.get_theta() == 1);
190
+ REQUIRE(update_sketch.get_num_retained() == 0);
191
+ REQUIRE(update_sketch.is_ordered());
154
192
  }
155
193
 
156
194
  template<typename T>
@@ -81,6 +81,13 @@ TEST_CASE("tuple_union float: simple case", "[tuple union]") {
81
81
  u.update(update_sketch2);
82
82
  auto result = u.get_result();
83
83
  REQUIRE(result.get_num_retained() == 3);
84
+
85
+ u.reset();
86
+ result = u.get_result();
87
+ REQUIRE(result.is_empty());
88
+ REQUIRE(result.get_num_retained() == 0);
89
+ REQUIRE(!result.is_estimation_mode());
90
+ REQUIRE(result.get_estimate() == 0);
84
91
  }
85
92
 
86
93
  TEST_CASE("tuple_union float: exact mode half overlap", "[tuple union]") {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-09-29 00:00:00.000000000 Z
11
+ date: 2021-12-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -51,6 +51,7 @@ files:
51
51
  - vendor/datasketches-cpp/MANIFEST.in
52
52
  - vendor/datasketches-cpp/NOTICE
53
53
  - vendor/datasketches-cpp/README.md
54
+ - vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in
54
55
  - vendor/datasketches-cpp/common/CMakeLists.txt
55
56
  - vendor/datasketches-cpp/common/include/MurmurHash3.h
56
57
  - vendor/datasketches-cpp/common/include/binomial_bounds.hpp
@@ -256,14 +257,18 @@ files:
256
257
  - vendor/datasketches-cpp/theta/test/CMakeLists.txt
257
258
  - vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp
258
259
  - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk
260
+ - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk
261
+ - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk
259
262
  - vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk
263
+ - vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk
264
+ - vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk
265
+ - vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk
260
266
  - vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk
261
267
  - vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp
262
268
  - vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp
269
+ - vendor/datasketches-cpp/theta/test/theta_setop_test.cpp
263
270
  - vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp
264
271
  - vendor/datasketches-cpp/theta/test/theta_union_test.cpp
265
- - vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk
266
- - vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk
267
272
  - vendor/datasketches-cpp/tuple/CMakeLists.txt
268
273
  - vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp
269
274
  - vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp
@@ -295,7 +300,7 @@ files:
295
300
  - vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp
296
301
  - vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp
297
302
  - vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp
298
- homepage: https://github.com/ankane/datasketches
303
+ homepage: https://github.com/ankane/datasketches-ruby
299
304
  licenses:
300
305
  - Apache-2.0
301
306
  metadata: {}
@@ -314,7 +319,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
314
319
  - !ruby/object:Gem::Version
315
320
  version: '0'
316
321
  requirements: []
317
- rubygems_version: 3.2.22
322
+ rubygems_version: 3.3.3
318
323
  signing_key:
319
324
  specification_version: 4
320
325
  summary: Sketch data structures for Ruby