roaring 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +1 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +7 -0
- data/README.md +16 -11
- data/Rakefile +12 -7
- data/benchmark/to_a.rb +15 -0
- data/benchmark/to_set.rb +15 -0
- data/ext/roaring/bitmap32.c +139 -5
- data/ext/roaring/bitmap64.c +86 -7
- data/ext/roaring/roaring.c +5610 -5501
- data/ext/roaring/roaring.h +226 -162
- data/lib/roaring/version.rb +1 -1
- data/lib/roaring.rb +94 -38
- metadata +6 -3
data/ext/roaring/roaring.h
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!!
|
|
2
|
-
// Created by amalgamation.sh on 2024-
|
|
2
|
+
// Created by amalgamation.sh on 2024-09-21T20:01:13Z
|
|
3
3
|
|
|
4
4
|
/*
|
|
5
5
|
* The CRoaring project is under a dual license (Apache/MIT).
|
|
@@ -59,165 +59,14 @@
|
|
|
59
59
|
// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand
|
|
60
60
|
#ifndef ROARING_INCLUDE_ROARING_VERSION
|
|
61
61
|
#define ROARING_INCLUDE_ROARING_VERSION
|
|
62
|
-
#define ROARING_VERSION "4.
|
|
62
|
+
#define ROARING_VERSION "4.1.7"
|
|
63
63
|
enum {
|
|
64
64
|
ROARING_VERSION_MAJOR = 4,
|
|
65
|
-
ROARING_VERSION_MINOR =
|
|
66
|
-
ROARING_VERSION_REVISION =
|
|
65
|
+
ROARING_VERSION_MINOR = 1,
|
|
66
|
+
ROARING_VERSION_REVISION = 7
|
|
67
67
|
};
|
|
68
68
|
#endif // ROARING_INCLUDE_ROARING_VERSION
|
|
69
69
|
// clang-format on/* end file include/roaring/roaring_version.h */
|
|
70
|
-
/* begin file include/roaring/roaring_types.h */
|
|
71
|
-
/*
|
|
72
|
-
Typedefs used by various components
|
|
73
|
-
*/
|
|
74
|
-
|
|
75
|
-
#ifndef ROARING_TYPES_H
|
|
76
|
-
#define ROARING_TYPES_H
|
|
77
|
-
|
|
78
|
-
#include <stdbool.h>
|
|
79
|
-
#include <stdint.h>
|
|
80
|
-
|
|
81
|
-
#ifdef __cplusplus
|
|
82
|
-
extern "C" {
|
|
83
|
-
namespace roaring {
|
|
84
|
-
namespace api {
|
|
85
|
-
#endif
|
|
86
|
-
|
|
87
|
-
/**
|
|
88
|
-
* When building .c files as C++, there's added compile-time checking if the
|
|
89
|
-
* container types are derived from a `container_t` base class. So long as
|
|
90
|
-
* such a base class is empty, the struct will behave compatibly with C structs
|
|
91
|
-
* despite the derivation. This is due to the Empty Base Class Optimization:
|
|
92
|
-
*
|
|
93
|
-
* https://en.cppreference.com/w/cpp/language/ebo
|
|
94
|
-
*
|
|
95
|
-
* But since C isn't namespaced, taking `container_t` globally might collide
|
|
96
|
-
* with other projects. So roaring.h uses ROARING_CONTAINER_T, while internal
|
|
97
|
-
* code #undefs that after declaring `typedef ROARING_CONTAINER_T container_t;`
|
|
98
|
-
*/
|
|
99
|
-
#if defined(__cplusplus)
|
|
100
|
-
extern "C++" {
|
|
101
|
-
struct container_s {};
|
|
102
|
-
}
|
|
103
|
-
#define ROARING_CONTAINER_T ::roaring::api::container_s
|
|
104
|
-
#else
|
|
105
|
-
#define ROARING_CONTAINER_T void // no compile-time checking
|
|
106
|
-
#endif
|
|
107
|
-
|
|
108
|
-
#define ROARING_FLAG_COW UINT8_C(0x1)
|
|
109
|
-
#define ROARING_FLAG_FROZEN UINT8_C(0x2)
|
|
110
|
-
|
|
111
|
-
/**
|
|
112
|
-
* Roaring arrays are array-based key-value pairs having containers as values
|
|
113
|
-
* and 16-bit integer keys. A roaring bitmap might be implemented as such.
|
|
114
|
-
*/
|
|
115
|
-
|
|
116
|
-
// parallel arrays. Element sizes quite different.
|
|
117
|
-
// Alternative is array
|
|
118
|
-
// of structs. Which would have better
|
|
119
|
-
// cache performance through binary searches?
|
|
120
|
-
|
|
121
|
-
typedef struct roaring_array_s {
|
|
122
|
-
int32_t size;
|
|
123
|
-
int32_t allocation_size;
|
|
124
|
-
ROARING_CONTAINER_T **containers; // Use container_t in non-API files!
|
|
125
|
-
uint16_t *keys;
|
|
126
|
-
uint8_t *typecodes;
|
|
127
|
-
uint8_t flags;
|
|
128
|
-
} roaring_array_t;
|
|
129
|
-
|
|
130
|
-
typedef bool (*roaring_iterator)(uint32_t value, void *param);
|
|
131
|
-
typedef bool (*roaring_iterator64)(uint64_t value, void *param);
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
* (For advanced users.)
|
|
135
|
-
* The roaring_statistics_t can be used to collect detailed statistics about
|
|
136
|
-
* the composition of a roaring bitmap.
|
|
137
|
-
*/
|
|
138
|
-
typedef struct roaring_statistics_s {
|
|
139
|
-
uint32_t n_containers; /* number of containers */
|
|
140
|
-
|
|
141
|
-
uint32_t n_array_containers; /* number of array containers */
|
|
142
|
-
uint32_t n_run_containers; /* number of run containers */
|
|
143
|
-
uint32_t n_bitset_containers; /* number of bitmap containers */
|
|
144
|
-
|
|
145
|
-
uint32_t
|
|
146
|
-
n_values_array_containers; /* number of values in array containers */
|
|
147
|
-
uint32_t n_values_run_containers; /* number of values in run containers */
|
|
148
|
-
uint32_t
|
|
149
|
-
n_values_bitset_containers; /* number of values in bitmap containers */
|
|
150
|
-
|
|
151
|
-
uint32_t n_bytes_array_containers; /* number of allocated bytes in array
|
|
152
|
-
containers */
|
|
153
|
-
uint32_t n_bytes_run_containers; /* number of allocated bytes in run
|
|
154
|
-
containers */
|
|
155
|
-
uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap
|
|
156
|
-
containers */
|
|
157
|
-
|
|
158
|
-
uint32_t
|
|
159
|
-
max_value; /* the maximal value, undefined if cardinality is zero */
|
|
160
|
-
uint32_t
|
|
161
|
-
min_value; /* the minimal value, undefined if cardinality is zero */
|
|
162
|
-
uint64_t sum_value; /* deprecated always zero */
|
|
163
|
-
|
|
164
|
-
uint64_t cardinality; /* total number of values stored in the bitmap */
|
|
165
|
-
|
|
166
|
-
// and n_values_arrays, n_values_rle, n_values_bitmap
|
|
167
|
-
} roaring_statistics_t;
|
|
168
|
-
|
|
169
|
-
/**
|
|
170
|
-
* (For advanced users.)
|
|
171
|
-
* The roaring64_statistics_t can be used to collect detailed statistics about
|
|
172
|
-
* the composition of a roaring64 bitmap.
|
|
173
|
-
*/
|
|
174
|
-
typedef struct roaring64_statistics_s {
|
|
175
|
-
uint64_t n_containers; /* number of containers */
|
|
176
|
-
|
|
177
|
-
uint64_t n_array_containers; /* number of array containers */
|
|
178
|
-
uint64_t n_run_containers; /* number of run containers */
|
|
179
|
-
uint64_t n_bitset_containers; /* number of bitmap containers */
|
|
180
|
-
|
|
181
|
-
uint64_t
|
|
182
|
-
n_values_array_containers; /* number of values in array containers */
|
|
183
|
-
uint64_t n_values_run_containers; /* number of values in run containers */
|
|
184
|
-
uint64_t
|
|
185
|
-
n_values_bitset_containers; /* number of values in bitmap containers */
|
|
186
|
-
|
|
187
|
-
uint64_t n_bytes_array_containers; /* number of allocated bytes in array
|
|
188
|
-
containers */
|
|
189
|
-
uint64_t n_bytes_run_containers; /* number of allocated bytes in run
|
|
190
|
-
containers */
|
|
191
|
-
uint64_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap
|
|
192
|
-
containers */
|
|
193
|
-
|
|
194
|
-
uint64_t
|
|
195
|
-
max_value; /* the maximal value, undefined if cardinality is zero */
|
|
196
|
-
uint64_t
|
|
197
|
-
min_value; /* the minimal value, undefined if cardinality is zero */
|
|
198
|
-
|
|
199
|
-
uint64_t cardinality; /* total number of values stored in the bitmap */
|
|
200
|
-
|
|
201
|
-
// and n_values_arrays, n_values_rle, n_values_bitmap
|
|
202
|
-
} roaring64_statistics_t;
|
|
203
|
-
|
|
204
|
-
/**
|
|
205
|
-
* Roaring-internal type used to iterate within a roaring container.
|
|
206
|
-
*/
|
|
207
|
-
typedef struct roaring_container_iterator_s {
|
|
208
|
-
// For bitset and array containers this is the index of the bit / entry.
|
|
209
|
-
// For run containers this points at the run.
|
|
210
|
-
int32_t index;
|
|
211
|
-
} roaring_container_iterator_t;
|
|
212
|
-
|
|
213
|
-
#ifdef __cplusplus
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
} // extern "C" { namespace roaring { namespace api {
|
|
217
|
-
#endif
|
|
218
|
-
|
|
219
|
-
#endif /* ROARING_TYPES_H */
|
|
220
|
-
/* end file include/roaring/roaring_types.h */
|
|
221
70
|
/* begin file include/roaring/portability.h */
|
|
222
71
|
/*
|
|
223
72
|
* portability.h
|
|
@@ -806,10 +655,22 @@ static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
|
|
|
806
655
|
|
|
807
656
|
#if defined(__GNUC__) || defined(__clang__)
|
|
808
657
|
#define CROARING_DEPRECATED __attribute__((deprecated))
|
|
658
|
+
#elif defined(_MSC_VER)
|
|
659
|
+
#define CROARING_DEPRECATED __declspec(deprecated)
|
|
809
660
|
#else
|
|
810
661
|
#define CROARING_DEPRECATED
|
|
811
662
|
#endif // defined(__GNUC__) || defined(__clang__)
|
|
812
663
|
|
|
664
|
+
// We want to initialize structs to zero portably (C and C++), without
|
|
665
|
+
// warnings. We can do mystruct s = CROARING_ZERO_INITIALIZER;
|
|
666
|
+
#if __cplusplus
|
|
667
|
+
#define CROARING_ZERO_INITIALIZER \
|
|
668
|
+
{}
|
|
669
|
+
#else
|
|
670
|
+
#define CROARING_ZERO_INITIALIZER \
|
|
671
|
+
{ 0 }
|
|
672
|
+
#endif
|
|
673
|
+
|
|
813
674
|
// We need portability.h to be included first,
|
|
814
675
|
// but we also always want isadetection.h to be
|
|
815
676
|
// included (right after).
|
|
@@ -819,6 +680,160 @@ static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
|
|
|
819
680
|
// strict requirement.
|
|
820
681
|
#endif /* INCLUDE_PORTABILITY_H_ */
|
|
821
682
|
/* end file include/roaring/portability.h */
|
|
683
|
+
/* begin file include/roaring/roaring_types.h */
|
|
684
|
+
/*
|
|
685
|
+
Typedefs used by various components
|
|
686
|
+
*/
|
|
687
|
+
|
|
688
|
+
#ifndef ROARING_TYPES_H
|
|
689
|
+
#define ROARING_TYPES_H
|
|
690
|
+
|
|
691
|
+
#include <stdbool.h>
|
|
692
|
+
#include <stdint.h>
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
#ifdef __cplusplus
|
|
696
|
+
extern "C" {
|
|
697
|
+
namespace roaring {
|
|
698
|
+
namespace api {
|
|
699
|
+
#endif
|
|
700
|
+
|
|
701
|
+
/**
|
|
702
|
+
* When building .c files as C++, there's added compile-time checking if the
|
|
703
|
+
* container types are derived from a `container_t` base class. So long as
|
|
704
|
+
* such a base class is empty, the struct will behave compatibly with C structs
|
|
705
|
+
* despite the derivation. This is due to the Empty Base Class Optimization:
|
|
706
|
+
*
|
|
707
|
+
* https://en.cppreference.com/w/cpp/language/ebo
|
|
708
|
+
*
|
|
709
|
+
* But since C isn't namespaced, taking `container_t` globally might collide
|
|
710
|
+
* with other projects. So roaring.h uses ROARING_CONTAINER_T, while internal
|
|
711
|
+
* code #undefs that after declaring `typedef ROARING_CONTAINER_T container_t;`
|
|
712
|
+
*/
|
|
713
|
+
#if defined(__cplusplus)
|
|
714
|
+
extern "C++" {
|
|
715
|
+
struct container_s {};
|
|
716
|
+
}
|
|
717
|
+
#define ROARING_CONTAINER_T ::roaring::api::container_s
|
|
718
|
+
#else
|
|
719
|
+
#define ROARING_CONTAINER_T void // no compile-time checking
|
|
720
|
+
#endif
|
|
721
|
+
|
|
722
|
+
#define ROARING_FLAG_COW UINT8_C(0x1)
|
|
723
|
+
#define ROARING_FLAG_FROZEN UINT8_C(0x2)
|
|
724
|
+
|
|
725
|
+
/**
|
|
726
|
+
* Roaring arrays are array-based key-value pairs having containers as values
|
|
727
|
+
* and 16-bit integer keys. A roaring bitmap might be implemented as such.
|
|
728
|
+
*/
|
|
729
|
+
|
|
730
|
+
// parallel arrays. Element sizes quite different.
|
|
731
|
+
// Alternative is array
|
|
732
|
+
// of structs. Which would have better
|
|
733
|
+
// cache performance through binary searches?
|
|
734
|
+
|
|
735
|
+
typedef struct roaring_array_s {
|
|
736
|
+
int32_t size;
|
|
737
|
+
int32_t allocation_size;
|
|
738
|
+
ROARING_CONTAINER_T **containers; // Use container_t in non-API files!
|
|
739
|
+
uint16_t *keys;
|
|
740
|
+
uint8_t *typecodes;
|
|
741
|
+
uint8_t flags;
|
|
742
|
+
} roaring_array_t;
|
|
743
|
+
|
|
744
|
+
typedef bool (*roaring_iterator)(uint32_t value, void *param);
|
|
745
|
+
typedef bool (*roaring_iterator64)(uint64_t value, void *param);
|
|
746
|
+
|
|
747
|
+
/**
|
|
748
|
+
* (For advanced users.)
|
|
749
|
+
* The roaring_statistics_t can be used to collect detailed statistics about
|
|
750
|
+
* the composition of a roaring bitmap.
|
|
751
|
+
*/
|
|
752
|
+
typedef struct roaring_statistics_s {
|
|
753
|
+
uint32_t n_containers; /* number of containers */
|
|
754
|
+
|
|
755
|
+
uint32_t n_array_containers; /* number of array containers */
|
|
756
|
+
uint32_t n_run_containers; /* number of run containers */
|
|
757
|
+
uint32_t n_bitset_containers; /* number of bitmap containers */
|
|
758
|
+
|
|
759
|
+
uint32_t
|
|
760
|
+
n_values_array_containers; /* number of values in array containers */
|
|
761
|
+
uint32_t n_values_run_containers; /* number of values in run containers */
|
|
762
|
+
uint32_t
|
|
763
|
+
n_values_bitset_containers; /* number of values in bitmap containers */
|
|
764
|
+
|
|
765
|
+
uint32_t n_bytes_array_containers; /* number of allocated bytes in array
|
|
766
|
+
containers */
|
|
767
|
+
uint32_t n_bytes_run_containers; /* number of allocated bytes in run
|
|
768
|
+
containers */
|
|
769
|
+
uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap
|
|
770
|
+
containers */
|
|
771
|
+
|
|
772
|
+
uint32_t
|
|
773
|
+
max_value; /* the maximal value, undefined if cardinality is zero */
|
|
774
|
+
uint32_t
|
|
775
|
+
min_value; /* the minimal value, undefined if cardinality is zero */
|
|
776
|
+
|
|
777
|
+
CROARING_DEPRECATED
|
|
778
|
+
uint64_t sum_value; /* deprecated always zero */
|
|
779
|
+
|
|
780
|
+
uint64_t cardinality; /* total number of values stored in the bitmap */
|
|
781
|
+
|
|
782
|
+
// and n_values_arrays, n_values_rle, n_values_bitmap
|
|
783
|
+
} roaring_statistics_t;
|
|
784
|
+
|
|
785
|
+
/**
|
|
786
|
+
* (For advanced users.)
|
|
787
|
+
* The roaring64_statistics_t can be used to collect detailed statistics about
|
|
788
|
+
* the composition of a roaring64 bitmap.
|
|
789
|
+
*/
|
|
790
|
+
typedef struct roaring64_statistics_s {
|
|
791
|
+
uint64_t n_containers; /* number of containers */
|
|
792
|
+
|
|
793
|
+
uint64_t n_array_containers; /* number of array containers */
|
|
794
|
+
uint64_t n_run_containers; /* number of run containers */
|
|
795
|
+
uint64_t n_bitset_containers; /* number of bitmap containers */
|
|
796
|
+
|
|
797
|
+
uint64_t
|
|
798
|
+
n_values_array_containers; /* number of values in array containers */
|
|
799
|
+
uint64_t n_values_run_containers; /* number of values in run containers */
|
|
800
|
+
uint64_t
|
|
801
|
+
n_values_bitset_containers; /* number of values in bitmap containers */
|
|
802
|
+
|
|
803
|
+
uint64_t n_bytes_array_containers; /* number of allocated bytes in array
|
|
804
|
+
containers */
|
|
805
|
+
uint64_t n_bytes_run_containers; /* number of allocated bytes in run
|
|
806
|
+
containers */
|
|
807
|
+
uint64_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap
|
|
808
|
+
containers */
|
|
809
|
+
|
|
810
|
+
uint64_t
|
|
811
|
+
max_value; /* the maximal value, undefined if cardinality is zero */
|
|
812
|
+
uint64_t
|
|
813
|
+
min_value; /* the minimal value, undefined if cardinality is zero */
|
|
814
|
+
|
|
815
|
+
uint64_t cardinality; /* total number of values stored in the bitmap */
|
|
816
|
+
|
|
817
|
+
// and n_values_arrays, n_values_rle, n_values_bitmap
|
|
818
|
+
} roaring64_statistics_t;
|
|
819
|
+
|
|
820
|
+
/**
|
|
821
|
+
* Roaring-internal type used to iterate within a roaring container.
|
|
822
|
+
*/
|
|
823
|
+
typedef struct roaring_container_iterator_s {
|
|
824
|
+
// For bitset and array containers this is the index of the bit / entry.
|
|
825
|
+
// For run containers this points at the run.
|
|
826
|
+
int32_t index;
|
|
827
|
+
} roaring_container_iterator_t;
|
|
828
|
+
|
|
829
|
+
#ifdef __cplusplus
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
} // extern "C" { namespace roaring { namespace api {
|
|
833
|
+
#endif
|
|
834
|
+
|
|
835
|
+
#endif /* ROARING_TYPES_H */
|
|
836
|
+
/* end file include/roaring/roaring_types.h */
|
|
822
837
|
/* begin file include/roaring/bitset/bitset.h */
|
|
823
838
|
#ifndef CROARING_CBITSET_BITSET_H
|
|
824
839
|
#define CROARING_CBITSET_BITSET_H
|
|
@@ -1664,6 +1679,10 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
|
|
|
1664
1679
|
* This function is endian-sensitive. If you have a big-endian system (e.g., a
|
|
1665
1680
|
* mainframe IBM s390x), the data format is going to be big-endian and not
|
|
1666
1681
|
* compatible with little-endian systems.
|
|
1682
|
+
*
|
|
1683
|
+
* When serializing data to a file, we recommend that you also use
|
|
1684
|
+
* checksums so that, at deserialization, you can be confident
|
|
1685
|
+
* that you are recovering the correct data.
|
|
1667
1686
|
*/
|
|
1668
1687
|
size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
|
|
1669
1688
|
|
|
@@ -1727,7 +1746,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
|
|
|
1727
1746
|
* https://github.com/RoaringBitmap/RoaringFormatSpec
|
|
1728
1747
|
*
|
|
1729
1748
|
* The function itself is safe in the sense that it will not cause buffer
|
|
1730
|
-
* overflows
|
|
1749
|
+
* overflows: it will not read beyond the scope of the provided buffer
|
|
1750
|
+
* (buf,maxbytes).
|
|
1751
|
+
*
|
|
1752
|
+
* However, for correct operations, it is assumed that the bitmap
|
|
1731
1753
|
* read was once serialized from a valid bitmap (i.e., it follows the format
|
|
1732
1754
|
* specification). If you provided an incorrect input (garbage), then the bitmap
|
|
1733
1755
|
* read may not be in a valid state and following operations may not lead to
|
|
@@ -1737,8 +1759,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
|
|
|
1737
1759
|
* but not for random inputs.
|
|
1738
1760
|
*
|
|
1739
1761
|
* You may use roaring_bitmap_internal_validate to check the validity of the
|
|
1740
|
-
* bitmap prior to using it.
|
|
1741
|
-
*
|
|
1762
|
+
* bitmap prior to using it.
|
|
1763
|
+
*
|
|
1764
|
+
* We recommend that you use checksums to check that serialized data corresponds
|
|
1765
|
+
* to a serialized bitmap.
|
|
1742
1766
|
*
|
|
1743
1767
|
* This function is endian-sensitive. If you have a big-endian system (e.g., a
|
|
1744
1768
|
* mainframe IBM s390x), the data format is going to be big-endian and not
|
|
@@ -1800,6 +1824,10 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
|
|
|
1800
1824
|
* This function is endian-sensitive. If you have a big-endian system (e.g., a
|
|
1801
1825
|
* mainframe IBM s390x), the data format is going to be big-endian and not
|
|
1802
1826
|
* compatible with little-endian systems.
|
|
1827
|
+
*
|
|
1828
|
+
* When serializing data to a file, we recommend that you also use
|
|
1829
|
+
* checksums so that, at deserialization, you can be confident
|
|
1830
|
+
* that you are recovering the correct data.
|
|
1803
1831
|
*/
|
|
1804
1832
|
size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf);
|
|
1805
1833
|
|
|
@@ -1834,6 +1862,10 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
|
|
|
1834
1862
|
* This function is endian-sensitive. If you have a big-endian system (e.g., a
|
|
1835
1863
|
* mainframe IBM s390x), the data format is going to be big-endian and not
|
|
1836
1864
|
* compatible with little-endian systems.
|
|
1865
|
+
*
|
|
1866
|
+
* When serializing data to a file, we recommend that you also use
|
|
1867
|
+
* checksums so that, at deserialization, you can be confident
|
|
1868
|
+
* that you are recovering the correct data.
|
|
1837
1869
|
*/
|
|
1838
1870
|
void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
|
|
1839
1871
|
|
|
@@ -2247,12 +2279,12 @@ using namespace ::roaring::api;
|
|
|
2247
2279
|
#ifndef INCLUDE_ROARING_MEMORY_H_
|
|
2248
2280
|
#define INCLUDE_ROARING_MEMORY_H_
|
|
2249
2281
|
|
|
2282
|
+
#include <stddef.h> // for size_t
|
|
2283
|
+
|
|
2250
2284
|
#ifdef __cplusplus
|
|
2251
2285
|
extern "C" {
|
|
2252
2286
|
#endif
|
|
2253
2287
|
|
|
2254
|
-
#include <stddef.h> // for size_t
|
|
2255
|
-
|
|
2256
2288
|
typedef void* (*roaring_malloc_p)(size_t);
|
|
2257
2289
|
typedef void* (*roaring_realloc_p)(void*, size_t);
|
|
2258
2290
|
typedef void* (*roaring_calloc_p)(size_t, size_t);
|
|
@@ -2376,6 +2408,14 @@ roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args,
|
|
|
2376
2408
|
&((const uint64_t[]){0, __VA_ARGS__})[1])
|
|
2377
2409
|
#endif
|
|
2378
2410
|
|
|
2411
|
+
/**
|
|
2412
|
+
* Create a new bitmap by moving containers from a 32 bit roaring bitmap.
|
|
2413
|
+
*
|
|
2414
|
+
* After calling this function, the original bitmap will be empty, and the
|
|
2415
|
+
* returned bitmap will contain all the values from the original bitmap.
|
|
2416
|
+
*/
|
|
2417
|
+
roaring64_bitmap_t *roaring64_bitmap_move_from_roaring32(roaring_bitmap_t *r);
|
|
2418
|
+
|
|
2379
2419
|
/**
|
|
2380
2420
|
* Create a new bitmap containing all the values in [min, max) that are at a
|
|
2381
2421
|
* distance k*step from min.
|
|
@@ -2486,6 +2526,11 @@ void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min,
|
|
|
2486
2526
|
void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min,
|
|
2487
2527
|
uint64_t max);
|
|
2488
2528
|
|
|
2529
|
+
/**
|
|
2530
|
+
* Empties the bitmap.
|
|
2531
|
+
*/
|
|
2532
|
+
void roaring64_bitmap_clear(roaring64_bitmap_t *r);
|
|
2533
|
+
|
|
2489
2534
|
/**
|
|
2490
2535
|
* Returns true if the provided value is present.
|
|
2491
2536
|
*/
|
|
@@ -2556,6 +2601,12 @@ uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r);
|
|
|
2556
2601
|
uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r,
|
|
2557
2602
|
uint64_t min, uint64_t max);
|
|
2558
2603
|
|
|
2604
|
+
/**
|
|
2605
|
+
* Returns the number of elements in the range [min, max]
|
|
2606
|
+
*/
|
|
2607
|
+
uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r,
|
|
2608
|
+
uint64_t min, uint64_t max);
|
|
2609
|
+
|
|
2559
2610
|
/**
|
|
2560
2611
|
* Returns true if the bitmap is empty (cardinality is zero).
|
|
2561
2612
|
*/
|
|
@@ -2775,6 +2826,10 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r);
|
|
|
2775
2826
|
* This function is endian-sensitive. If you have a big-endian system (e.g., a
|
|
2776
2827
|
* mainframe IBM s390x), the data format is going to be big-endian and not
|
|
2777
2828
|
* compatible with little-endian systems.
|
|
2829
|
+
*
|
|
2830
|
+
* When serializing data to a file, we recommend that you also use
|
|
2831
|
+
* checksums so that, at deserialization, you can be confident
|
|
2832
|
+
* that you are recovering the correct data.
|
|
2778
2833
|
*/
|
|
2779
2834
|
size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r,
|
|
2780
2835
|
char *buf);
|
|
@@ -2789,14 +2844,17 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf,
|
|
|
2789
2844
|
size_t maxbytes);
|
|
2790
2845
|
|
|
2791
2846
|
/**
|
|
2792
|
-
* Read a bitmap from a serialized buffer
|
|
2847
|
+
* Read a bitmap from a serialized buffer (reading up to maxbytes).
|
|
2793
2848
|
* In case of failure, NULL is returned.
|
|
2794
2849
|
*
|
|
2795
2850
|
* This is meant to be compatible with other languages
|
|
2796
2851
|
* https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
|
|
2797
2852
|
*
|
|
2798
2853
|
* The function itself is safe in the sense that it will not cause buffer
|
|
2799
|
-
* overflows
|
|
2854
|
+
* overflows: it will not read beyond the scope of the provided buffer
|
|
2855
|
+
* (buf,maxbytes).
|
|
2856
|
+
*
|
|
2857
|
+
* However, for correct operations, it is assumed that the bitmap
|
|
2800
2858
|
* read was once serialized from a valid bitmap (i.e., it follows the format
|
|
2801
2859
|
* specification). If you provided an incorrect input (garbage), then the bitmap
|
|
2802
2860
|
* read may not be in a valid state and following operations may not lead to
|
|
@@ -2805,6 +2863,12 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf,
|
|
|
2805
2863
|
* order. This is is guaranteed to happen when serializing an existing bitmap,
|
|
2806
2864
|
* but not for random inputs.
|
|
2807
2865
|
*
|
|
2866
|
+
* You may use roaring64_bitmap_internal_validate to check the validity of the
|
|
2867
|
+
* bitmap prior to using it.
|
|
2868
|
+
*
|
|
2869
|
+
* We recommend that you use checksums to check that serialized data corresponds
|
|
2870
|
+
* to a serialized bitmap.
|
|
2871
|
+
*
|
|
2808
2872
|
* This function is endian-sensitive. If you have a big-endian system (e.g., a
|
|
2809
2873
|
* mainframe IBM s390x), the data format is going to be big-endian and not
|
|
2810
2874
|
* compatible with little-endian systems.
|
data/lib/roaring/version.rb
CHANGED
data/lib/roaring.rb
CHANGED
|
@@ -8,37 +8,67 @@ module Roaring
|
|
|
8
8
|
class Error < StandardError; end
|
|
9
9
|
|
|
10
10
|
module BitmapCommon
|
|
11
|
-
def self.included(base)
|
|
12
|
-
super
|
|
13
|
-
|
|
14
|
-
base.extend ClassMethods
|
|
15
|
-
|
|
16
|
-
base.alias_method :size, :cardinality
|
|
17
|
-
base.alias_method :length, :cardinality
|
|
18
|
-
base.alias_method :count, :cardinality
|
|
19
|
-
|
|
20
|
-
base.alias_method :+, :|
|
|
21
|
-
base.alias_method :union, :|
|
|
22
|
-
base.alias_method :intersection, :&
|
|
23
|
-
base.alias_method :difference, :-
|
|
24
|
-
|
|
25
|
-
base.alias_method :delete, :remove
|
|
26
|
-
base.alias_method :delete?, :remove?
|
|
27
|
-
|
|
28
|
-
base.alias_method :first, :min
|
|
29
|
-
base.alias_method :last, :max
|
|
30
|
-
|
|
31
|
-
base.alias_method :eql?, :==
|
|
32
|
-
|
|
33
|
-
base.alias_method :===, :include?
|
|
34
|
-
|
|
35
|
-
base.alias_method :subset?, :<=
|
|
36
|
-
base.alias_method :proper_subset?, :<
|
|
37
|
-
base.alias_method :superset?, :>=
|
|
38
|
-
base.alias_method :proper_superset?, :>
|
|
39
|
-
end
|
|
40
|
-
|
|
41
11
|
module ClassMethods
|
|
12
|
+
# @private
|
|
13
|
+
# @!macro [attach] property
|
|
14
|
+
# @!parse alias_method :<<, :add
|
|
15
|
+
#
|
|
16
|
+
# @!parse alias_method :size, :cardinality
|
|
17
|
+
# @!parse alias_method :length, :cardinality
|
|
18
|
+
# @!parse alias_method :count, :cardinality
|
|
19
|
+
#
|
|
20
|
+
# @!parse alias_method :&, :and
|
|
21
|
+
# @!parse alias_method :|, :or
|
|
22
|
+
# @!parse alias_method :^, :xor
|
|
23
|
+
# @!parse alias_method :-, :andnot
|
|
24
|
+
# @!parse alias_method :+, :or
|
|
25
|
+
# @!parse alias_method :union, :or
|
|
26
|
+
# @!parse alias_method :intersection, :and
|
|
27
|
+
# @!parse alias_method :difference, :andnot
|
|
28
|
+
#
|
|
29
|
+
# @!parse alias_method :delete, :remove
|
|
30
|
+
# @!parse alias_method :delete?, :remove?
|
|
31
|
+
#
|
|
32
|
+
# @!parse alias_method :first, :min
|
|
33
|
+
# @!parse alias_method :last, :max
|
|
34
|
+
#
|
|
35
|
+
# @!parse alias_method :eql?, :==
|
|
36
|
+
#
|
|
37
|
+
# @!parse alias_method :===, :include?
|
|
38
|
+
#
|
|
39
|
+
# @!parse alias_method :subset?, :<=
|
|
40
|
+
# @!parse alias_method :proper_subset?, :<
|
|
41
|
+
def define_roaring_aliases!
|
|
42
|
+
alias_method :<<, :add
|
|
43
|
+
|
|
44
|
+
alias_method :size, :cardinality
|
|
45
|
+
alias_method :length, :cardinality
|
|
46
|
+
alias_method :count, :cardinality
|
|
47
|
+
|
|
48
|
+
alias_method :&, :and
|
|
49
|
+
alias_method :|, :or
|
|
50
|
+
alias_method :^, :xor
|
|
51
|
+
alias_method :-, :andnot
|
|
52
|
+
alias_method :+, :or
|
|
53
|
+
alias_method :union, :or
|
|
54
|
+
alias_method :intersection, :and
|
|
55
|
+
alias_method :difference, :andnot
|
|
56
|
+
|
|
57
|
+
alias_method :delete, :remove
|
|
58
|
+
alias_method :delete?, :remove?
|
|
59
|
+
|
|
60
|
+
alias_method :first, :min
|
|
61
|
+
alias_method :last, :max
|
|
62
|
+
|
|
63
|
+
alias_method :eql?, :==
|
|
64
|
+
|
|
65
|
+
alias_method :===, :include?
|
|
66
|
+
|
|
67
|
+
alias_method :subset?, :<=
|
|
68
|
+
alias_method :proper_subset?, :<
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Convenience method for building a bitmap
|
|
42
72
|
def [](*args)
|
|
43
73
|
if args.size == 0
|
|
44
74
|
new
|
|
@@ -61,6 +91,12 @@ module Roaring
|
|
|
61
91
|
|
|
62
92
|
if enum.instance_of?(self.class)
|
|
63
93
|
replace(enum)
|
|
94
|
+
elsif Range === enum
|
|
95
|
+
if enum.exclude_end?
|
|
96
|
+
add_range(enum.begin, enum.end)
|
|
97
|
+
else
|
|
98
|
+
add_range_closed(enum.begin, enum.end)
|
|
99
|
+
end
|
|
64
100
|
else
|
|
65
101
|
enum.each { |x| self << x }
|
|
66
102
|
end
|
|
@@ -74,14 +110,27 @@ module Roaring
|
|
|
74
110
|
replace(other)
|
|
75
111
|
end
|
|
76
112
|
|
|
77
|
-
|
|
113
|
+
# Check if `self` is a superset of `other`. A superset requires that `self` contain all of `other`'s elemtents. They may be equal.
|
|
114
|
+
# @return [Boolean] `true` if `self` is a strict subset of `other`, otherwise `false`
|
|
115
|
+
def superset?(other)
|
|
116
|
+
other <= self
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Check if `self` is a strict superset of `other`. A strict superset requires that `self` contain all of `other`'s elemtents, but that they aren't exactly equal.
|
|
120
|
+
# @return [Boolean] `true` if `self` is a strict subset of `other`, otherwise `false`
|
|
121
|
+
def proper_superset?(other)
|
|
78
122
|
other < self
|
|
79
123
|
end
|
|
80
124
|
|
|
81
|
-
|
|
82
|
-
|
|
125
|
+
alias_method :>=, :superset?
|
|
126
|
+
alias_method :>, :proper_superset?
|
|
127
|
+
|
|
128
|
+
def add_range(min, max)
|
|
129
|
+
return if max <= min
|
|
130
|
+
add_range_closed(min, max - 1)
|
|
83
131
|
end
|
|
84
132
|
|
|
133
|
+
# @return [Integer] Returns 0 if the bitmaps are equal, -1 / +1 if the set is a subset / superset of the given set, or nil if they both have unique elements.
|
|
85
134
|
def <=>(other)
|
|
86
135
|
if self == other
|
|
87
136
|
0
|
|
@@ -102,14 +151,15 @@ module Roaring
|
|
|
102
151
|
serialize
|
|
103
152
|
end
|
|
104
153
|
|
|
105
|
-
def to_a
|
|
106
|
-
map(&:itself)
|
|
107
|
-
end
|
|
108
|
-
|
|
109
154
|
def to_set
|
|
110
|
-
::Set.new(
|
|
155
|
+
::Set.new(self)
|
|
111
156
|
end
|
|
112
157
|
|
|
158
|
+
# @example Small bitmap
|
|
159
|
+
# Roaring::Bitmap32[1,2,3].inspect #=> "#<Roaring::Bitmap32 {1, 2, 3}>"
|
|
160
|
+
# @example Large bitmap
|
|
161
|
+
# Roaring::Bitmap32[1..1000].inspect #=> "#<Roaring::Bitmap32 (1000 values)>"
|
|
162
|
+
# @return [String] a programmer-readable representation of the bitmap
|
|
113
163
|
def inspect
|
|
114
164
|
cardinality = self.cardinality
|
|
115
165
|
if cardinality < 64
|
|
@@ -122,6 +172,9 @@ module Roaring
|
|
|
122
172
|
|
|
123
173
|
class Bitmap32
|
|
124
174
|
include BitmapCommon
|
|
175
|
+
extend BitmapCommon::ClassMethods
|
|
176
|
+
|
|
177
|
+
define_roaring_aliases!
|
|
125
178
|
|
|
126
179
|
MIN = 0
|
|
127
180
|
MAX = (2**32) - 1
|
|
@@ -130,6 +183,9 @@ module Roaring
|
|
|
130
183
|
|
|
131
184
|
class Bitmap64
|
|
132
185
|
include BitmapCommon
|
|
186
|
+
extend BitmapCommon::ClassMethods
|
|
187
|
+
|
|
188
|
+
define_roaring_aliases!
|
|
133
189
|
|
|
134
190
|
MIN = 0
|
|
135
191
|
MAX = (2**64) - 1
|