exonware-xwnode 0.0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +14 -0
- exonware/xwnode/__init__.py +127 -0
- exonware/xwnode/base.py +676 -0
- exonware/xwnode/config.py +178 -0
- exonware/xwnode/contracts.py +730 -0
- exonware/xwnode/errors.py +503 -0
- exonware/xwnode/facade.py +460 -0
- exonware/xwnode/strategies/__init__.py +158 -0
- exonware/xwnode/strategies/advisor.py +463 -0
- exonware/xwnode/strategies/edges/__init__.py +32 -0
- exonware/xwnode/strategies/edges/adj_list.py +227 -0
- exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
- exonware/xwnode/strategies/edges/base.py +169 -0
- exonware/xwnode/strategies/flyweight.py +328 -0
- exonware/xwnode/strategies/impls/__init__.py +13 -0
- exonware/xwnode/strategies/impls/_base_edge.py +403 -0
- exonware/xwnode/strategies/impls/_base_node.py +307 -0
- exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
- exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
- exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
- exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
- exonware/xwnode/strategies/impls/edge_coo.py +533 -0
- exonware/xwnode/strategies/impls/edge_csc.py +447 -0
- exonware/xwnode/strategies/impls/edge_csr.py +492 -0
- exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
- exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
- exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
- exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
- exonware/xwnode/strategies/impls/edge_octree.py +574 -0
- exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
- exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
- exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
- exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
- exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
- exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
- exonware/xwnode/strategies/manager.py +775 -0
- exonware/xwnode/strategies/metrics.py +538 -0
- exonware/xwnode/strategies/migration.py +432 -0
- exonware/xwnode/strategies/nodes/__init__.py +50 -0
- exonware/xwnode/strategies/nodes/_base_node.py +307 -0
- exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
- exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
- exonware/xwnode/strategies/nodes/array_list.py +209 -0
- exonware/xwnode/strategies/nodes/base.py +247 -0
- exonware/xwnode/strategies/nodes/deque.py +200 -0
- exonware/xwnode/strategies/nodes/hash_map.py +135 -0
- exonware/xwnode/strategies/nodes/heap.py +307 -0
- exonware/xwnode/strategies/nodes/linked_list.py +232 -0
- exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
- exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
- exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
- exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
- exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
- exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
- exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
- exonware/xwnode/strategies/nodes/node_btree.py +357 -0
- exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
- exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
- exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
- exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
- exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
- exonware/xwnode/strategies/nodes/node_heap.py +191 -0
- exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
- exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
- exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
- exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
- exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
- exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
- exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
- exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
- exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
- exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
- exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
- exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
- exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
- exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
- exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
- exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
- exonware/xwnode/strategies/nodes/node_treap.py +387 -0
- exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
- exonware/xwnode/strategies/nodes/node_trie.py +252 -0
- exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
- exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
- exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
- exonware/xwnode/strategies/nodes/queue.py +161 -0
- exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
- exonware/xwnode/strategies/nodes/stack.py +152 -0
- exonware/xwnode/strategies/nodes/trie.py +274 -0
- exonware/xwnode/strategies/nodes/union_find.py +283 -0
- exonware/xwnode/strategies/pattern_detector.py +603 -0
- exonware/xwnode/strategies/performance_monitor.py +487 -0
- exonware/xwnode/strategies/queries/__init__.py +24 -0
- exonware/xwnode/strategies/queries/base.py +236 -0
- exonware/xwnode/strategies/queries/cql.py +201 -0
- exonware/xwnode/strategies/queries/cypher.py +181 -0
- exonware/xwnode/strategies/queries/datalog.py +70 -0
- exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
- exonware/xwnode/strategies/queries/eql.py +70 -0
- exonware/xwnode/strategies/queries/flux.py +70 -0
- exonware/xwnode/strategies/queries/gql.py +70 -0
- exonware/xwnode/strategies/queries/graphql.py +240 -0
- exonware/xwnode/strategies/queries/gremlin.py +181 -0
- exonware/xwnode/strategies/queries/hiveql.py +214 -0
- exonware/xwnode/strategies/queries/hql.py +70 -0
- exonware/xwnode/strategies/queries/jmespath.py +219 -0
- exonware/xwnode/strategies/queries/jq.py +66 -0
- exonware/xwnode/strategies/queries/json_query.py +66 -0
- exonware/xwnode/strategies/queries/jsoniq.py +248 -0
- exonware/xwnode/strategies/queries/kql.py +70 -0
- exonware/xwnode/strategies/queries/linq.py +238 -0
- exonware/xwnode/strategies/queries/logql.py +70 -0
- exonware/xwnode/strategies/queries/mql.py +68 -0
- exonware/xwnode/strategies/queries/n1ql.py +210 -0
- exonware/xwnode/strategies/queries/partiql.py +70 -0
- exonware/xwnode/strategies/queries/pig.py +215 -0
- exonware/xwnode/strategies/queries/promql.py +70 -0
- exonware/xwnode/strategies/queries/sparql.py +220 -0
- exonware/xwnode/strategies/queries/sql.py +275 -0
- exonware/xwnode/strategies/queries/xml_query.py +66 -0
- exonware/xwnode/strategies/queries/xpath.py +223 -0
- exonware/xwnode/strategies/queries/xquery.py +258 -0
- exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
- exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
- exonware/xwnode/strategies/registry.py +604 -0
- exonware/xwnode/strategies/simple.py +273 -0
- exonware/xwnode/strategies/utils.py +532 -0
- exonware/xwnode/types.py +912 -0
- exonware/xwnode/version.py +78 -0
- exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
- exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
- exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
- exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,407 @@
|
|
1
|
+
"""
|
2
|
+
HyperLogLog Node Strategy Implementation
|
3
|
+
|
4
|
+
This module implements the HYPERLOGLOG strategy for probabilistic
|
5
|
+
cardinality estimation with logarithmic space complexity.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any, Iterator, List, Dict, Optional, Set
|
9
|
+
import hashlib
|
10
|
+
import math
|
11
|
+
from ._base_node import aNodeStrategy
|
12
|
+
from ...types import NodeMode, NodeTrait
|
13
|
+
|
14
|
+
|
15
|
+
class xHyperLogLogStrategy(aNodeStrategy):
|
16
|
+
"""
|
17
|
+
HyperLogLog node strategy for cardinality estimation.
|
18
|
+
|
19
|
+
Provides memory-efficient approximate counting of distinct elements
|
20
|
+
with configurable precision and excellent scalability.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
|
24
|
+
"""Initialize the HyperLogLog strategy."""
|
25
|
+
super().__init__(NodeMode.HYPERLOGLOG, traits, **options)
|
26
|
+
|
27
|
+
# HyperLogLog parameters
|
28
|
+
self.precision = options.get('precision', 12) # b = 12 bits (4096 buckets)
|
29
|
+
if not 4 <= self.precision <= 16:
|
30
|
+
raise ValueError("Precision must be between 4 and 16")
|
31
|
+
|
32
|
+
self.num_buckets = 2 ** self.precision
|
33
|
+
self.alpha = self._calculate_alpha()
|
34
|
+
|
35
|
+
# Core storage: buckets store maximum leading zeros + 1
|
36
|
+
self._buckets: List[int] = [0] * self.num_buckets
|
37
|
+
|
38
|
+
# Key-value mapping for compatibility
|
39
|
+
self._values: Dict[str, Any] = {}
|
40
|
+
self._items_added: Set[str] = set()
|
41
|
+
self._size = 0
|
42
|
+
|
43
|
+
# Performance tracking
|
44
|
+
self._total_additions = 0
|
45
|
+
|
46
|
+
def get_supported_traits(self) -> NodeTrait:
|
47
|
+
"""Get the traits supported by the HyperLogLog strategy."""
|
48
|
+
return (NodeTrait.PROBABILISTIC | NodeTrait.COMPRESSED | NodeTrait.STREAMING)
|
49
|
+
|
50
|
+
def _calculate_alpha(self) -> float:
|
51
|
+
"""Calculate alpha constant for bias correction."""
|
52
|
+
m = self.num_buckets
|
53
|
+
|
54
|
+
if m == 16:
|
55
|
+
return 0.673
|
56
|
+
elif m == 32:
|
57
|
+
return 0.697
|
58
|
+
elif m == 64:
|
59
|
+
return 0.709
|
60
|
+
else:
|
61
|
+
return 0.7213 / (1.0 + 1.079 / m)
|
62
|
+
|
63
|
+
def _hash_item(self, item: str) -> int:
|
64
|
+
"""Hash item to 32-bit integer."""
|
65
|
+
hash_obj = hashlib.md5(item.encode())
|
66
|
+
return int(hash_obj.hexdigest()[:8], 16)
|
67
|
+
|
68
|
+
def _leading_zeros(self, num: int, max_bits: int = 32) -> int:
|
69
|
+
"""Count leading zeros in binary representation."""
|
70
|
+
if num == 0:
|
71
|
+
return max_bits
|
72
|
+
|
73
|
+
zeros = 0
|
74
|
+
mask = 1 << (max_bits - 1)
|
75
|
+
|
76
|
+
while zeros < max_bits and (num & mask) == 0:
|
77
|
+
zeros += 1
|
78
|
+
mask >>= 1
|
79
|
+
|
80
|
+
return zeros
|
81
|
+
|
82
|
+
def _add_hash(self, hash_value: int) -> None:
|
83
|
+
"""Add hash value to HyperLogLog."""
|
84
|
+
# Extract bucket index from first b bits
|
85
|
+
bucket = hash_value & ((1 << self.precision) - 1)
|
86
|
+
|
87
|
+
# Get remaining bits for leading zero count
|
88
|
+
remaining = hash_value >> self.precision
|
89
|
+
|
90
|
+
# Count leading zeros + 1
|
91
|
+
leading_zeros = self._leading_zeros(remaining, 32 - self.precision) + 1
|
92
|
+
|
93
|
+
# Update bucket with maximum value
|
94
|
+
self._buckets[bucket] = max(self._buckets[bucket], leading_zeros)
|
95
|
+
|
96
|
+
# ============================================================================
|
97
|
+
# CORE OPERATIONS (Key-based interface for compatibility)
|
98
|
+
# ============================================================================
|
99
|
+
|
100
|
+
def put(self, key: Any, value: Any = None) -> None:
|
101
|
+
"""Add item to cardinality estimation."""
|
102
|
+
item = str(key)
|
103
|
+
|
104
|
+
# Add to HyperLogLog
|
105
|
+
hash_value = self._hash_item(item)
|
106
|
+
self._add_hash(hash_value)
|
107
|
+
|
108
|
+
# Track for compatibility
|
109
|
+
if item not in self._items_added:
|
110
|
+
self._items_added.add(item)
|
111
|
+
self._size += 1
|
112
|
+
|
113
|
+
self._values[item] = value if value is not None else True
|
114
|
+
self._total_additions += 1
|
115
|
+
|
116
|
+
def get(self, key: Any, default: Any = None) -> Any:
|
117
|
+
"""Get value or cardinality estimate."""
|
118
|
+
key_str = str(key)
|
119
|
+
|
120
|
+
if key_str == "cardinality":
|
121
|
+
return self.estimate_cardinality()
|
122
|
+
elif key_str == "buckets":
|
123
|
+
return self._buckets.copy()
|
124
|
+
elif key_str == "statistics":
|
125
|
+
return self.get_statistics()
|
126
|
+
elif key_str == "raw_estimate":
|
127
|
+
return self._raw_estimate()
|
128
|
+
elif key_str in self._values:
|
129
|
+
return self._values[key_str]
|
130
|
+
|
131
|
+
return default
|
132
|
+
|
133
|
+
def has(self, key: Any) -> bool:
|
134
|
+
"""Check if item might exist (probabilistic)."""
|
135
|
+
key_str = str(key)
|
136
|
+
|
137
|
+
if key_str in ["cardinality", "buckets", "statistics", "raw_estimate"]:
|
138
|
+
return True
|
139
|
+
|
140
|
+
return key_str in self._items_added
|
141
|
+
|
142
|
+
def remove(self, key: Any) -> bool:
|
143
|
+
"""Remove item (not supported in HyperLogLog)."""
|
144
|
+
# HyperLogLog doesn't support deletion
|
145
|
+
return False
|
146
|
+
|
147
|
+
def delete(self, key: Any) -> bool:
|
148
|
+
"""Remove item (not supported in HyperLogLog)."""
|
149
|
+
return False
|
150
|
+
|
151
|
+
def clear(self) -> None:
|
152
|
+
"""Clear all data."""
|
153
|
+
self._buckets = [0] * self.num_buckets
|
154
|
+
self._values.clear()
|
155
|
+
self._items_added.clear()
|
156
|
+
self._size = 0
|
157
|
+
self._total_additions = 0
|
158
|
+
|
159
|
+
def keys(self) -> Iterator[str]:
|
160
|
+
"""Get all tracked items."""
|
161
|
+
for item in self._items_added:
|
162
|
+
yield item
|
163
|
+
yield "cardinality"
|
164
|
+
yield "buckets"
|
165
|
+
yield "statistics"
|
166
|
+
yield "raw_estimate"
|
167
|
+
|
168
|
+
def values(self) -> Iterator[Any]:
|
169
|
+
"""Get all values."""
|
170
|
+
for item in self._items_added:
|
171
|
+
yield self._values.get(item, True)
|
172
|
+
yield self.estimate_cardinality()
|
173
|
+
yield self._buckets.copy()
|
174
|
+
yield self.get_statistics()
|
175
|
+
yield self._raw_estimate()
|
176
|
+
|
177
|
+
def items(self) -> Iterator[tuple[str, Any]]:
|
178
|
+
"""Get all item-value pairs."""
|
179
|
+
for item in self._items_added:
|
180
|
+
yield (item, self._values.get(item, True))
|
181
|
+
yield ("cardinality", self.estimate_cardinality())
|
182
|
+
yield ("buckets", self._buckets.copy())
|
183
|
+
yield ("statistics", self.get_statistics())
|
184
|
+
yield ("raw_estimate", self._raw_estimate())
|
185
|
+
|
186
|
+
def __len__(self) -> int:
|
187
|
+
"""Get number of unique items tracked."""
|
188
|
+
return self._size
|
189
|
+
|
190
|
+
def to_native(self) -> Dict[str, Any]:
|
191
|
+
"""Convert to native Python dict."""
|
192
|
+
result = {}
|
193
|
+
for item in self._items_added:
|
194
|
+
result[item] = self._values.get(item, True)
|
195
|
+
|
196
|
+
result.update({
|
197
|
+
"cardinality": self.estimate_cardinality(),
|
198
|
+
"buckets": self._buckets.copy(),
|
199
|
+
"statistics": self.get_statistics(),
|
200
|
+
"raw_estimate": self._raw_estimate()
|
201
|
+
})
|
202
|
+
|
203
|
+
return result
|
204
|
+
|
205
|
+
@property
|
206
|
+
def is_list(self) -> bool:
|
207
|
+
"""This is not a list strategy."""
|
208
|
+
return False
|
209
|
+
|
210
|
+
@property
|
211
|
+
def is_dict(self) -> bool:
|
212
|
+
"""This behaves like a dict with probabilistic semantics."""
|
213
|
+
return True
|
214
|
+
|
215
|
+
# ============================================================================
|
216
|
+
# HYPERLOGLOG SPECIFIC OPERATIONS
|
217
|
+
# ============================================================================
|
218
|
+
|
219
|
+
def add(self, item: str) -> None:
|
220
|
+
"""Add item to cardinality estimation."""
|
221
|
+
self.put(item)
|
222
|
+
|
223
|
+
def _raw_estimate(self) -> float:
|
224
|
+
"""Calculate raw cardinality estimate."""
|
225
|
+
# Raw estimate: α_m * m² / Σ(2^(-M_j))
|
226
|
+
sum_powers = sum(2.0 ** (-bucket) for bucket in self._buckets)
|
227
|
+
return self.alpha * (self.num_buckets ** 2) / sum_powers
|
228
|
+
|
229
|
+
def estimate_cardinality(self) -> int:
|
230
|
+
"""Estimate cardinality with bias correction."""
|
231
|
+
raw_estimate = self._raw_estimate()
|
232
|
+
|
233
|
+
# Apply bias correction for small/large values
|
234
|
+
if raw_estimate <= 2.5 * self.num_buckets:
|
235
|
+
# Small range correction
|
236
|
+
zeros = self._buckets.count(0)
|
237
|
+
if zeros != 0:
|
238
|
+
return int(self.num_buckets * math.log(self.num_buckets / float(zeros)))
|
239
|
+
|
240
|
+
if raw_estimate <= (1.0/30.0) * (2**32):
|
241
|
+
# No correction needed
|
242
|
+
return int(raw_estimate)
|
243
|
+
else:
|
244
|
+
# Large range correction
|
245
|
+
return int(-1 * (2**32) * math.log(1 - raw_estimate / (2**32)))
|
246
|
+
|
247
|
+
def merge(self, other: 'xHyperLogLogStrategy') -> 'xHyperLogLogStrategy':
|
248
|
+
"""Merge with another HyperLogLog."""
|
249
|
+
if self.precision != other.precision:
|
250
|
+
raise ValueError("Cannot merge HyperLogLogs with different precision")
|
251
|
+
|
252
|
+
# Create new HyperLogLog
|
253
|
+
merged = xHyperLogLogStrategy(
|
254
|
+
traits=self._traits,
|
255
|
+
precision=self.precision
|
256
|
+
)
|
257
|
+
|
258
|
+
# Merge buckets (take maximum)
|
259
|
+
for i in range(self.num_buckets):
|
260
|
+
merged._buckets[i] = max(self._buckets[i], other._buckets[i])
|
261
|
+
|
262
|
+
# Merge tracked items
|
263
|
+
merged._items_added = self._items_added | other._items_added
|
264
|
+
merged._size = len(merged._items_added)
|
265
|
+
merged._total_additions = self._total_additions + other._total_additions
|
266
|
+
|
267
|
+
# Merge values
|
268
|
+
merged._values.update(self._values)
|
269
|
+
merged._values.update(other._values)
|
270
|
+
|
271
|
+
return merged
|
272
|
+
|
273
|
+
def union(self, other: 'xHyperLogLogStrategy') -> int:
|
274
|
+
"""Estimate cardinality of union with another HyperLogLog."""
|
275
|
+
merged = self.merge(other)
|
276
|
+
return merged.estimate_cardinality()
|
277
|
+
|
278
|
+
def jaccard_similarity(self, other: 'xHyperLogLogStrategy') -> float:
|
279
|
+
"""Estimate Jaccard similarity with another HyperLogLog."""
|
280
|
+
# |A ∩ B| / |A ∪ B| = (|A| + |B| - |A ∪ B|) / |A ∪ B|
|
281
|
+
card_a = self.estimate_cardinality()
|
282
|
+
card_b = other.estimate_cardinality()
|
283
|
+
card_union = self.union(other)
|
284
|
+
|
285
|
+
if card_union == 0:
|
286
|
+
return 1.0 if card_a == 0 and card_b == 0 else 0.0
|
287
|
+
|
288
|
+
card_intersection = card_a + card_b - card_union
|
289
|
+
return max(0.0, card_intersection / card_union)
|
290
|
+
|
291
|
+
def get_bucket_statistics(self) -> Dict[str, Any]:
|
292
|
+
"""Get statistics about bucket distribution."""
|
293
|
+
non_zero = sum(1 for bucket in self._buckets if bucket > 0)
|
294
|
+
max_bucket = max(self._buckets) if self._buckets else 0
|
295
|
+
avg_bucket = sum(self._buckets) / len(self._buckets) if self._buckets else 0
|
296
|
+
|
297
|
+
# Bucket value distribution
|
298
|
+
bucket_dist = {}
|
299
|
+
for value in self._buckets:
|
300
|
+
bucket_dist[value] = bucket_dist.get(value, 0) + 1
|
301
|
+
|
302
|
+
return {
|
303
|
+
'total_buckets': self.num_buckets,
|
304
|
+
'non_zero_buckets': non_zero,
|
305
|
+
'zero_buckets': self.num_buckets - non_zero,
|
306
|
+
'max_bucket_value': max_bucket,
|
307
|
+
'avg_bucket_value': avg_bucket,
|
308
|
+
'bucket_distribution': bucket_dist
|
309
|
+
}
|
310
|
+
|
311
|
+
def get_error_bounds(self) -> Dict[str, float]:
|
312
|
+
"""Get theoretical error bounds."""
|
313
|
+
# Standard error: 1.04 / sqrt(m)
|
314
|
+
standard_error = 1.04 / math.sqrt(self.num_buckets)
|
315
|
+
|
316
|
+
estimate = self.estimate_cardinality()
|
317
|
+
error_margin = estimate * standard_error
|
318
|
+
|
319
|
+
return {
|
320
|
+
'estimate': estimate,
|
321
|
+
'standard_error': standard_error,
|
322
|
+
'error_margin': error_margin,
|
323
|
+
'lower_bound': max(0, estimate - 2 * error_margin),
|
324
|
+
'upper_bound': estimate + 2 * error_margin,
|
325
|
+
'confidence': 0.95 # 95% confidence interval
|
326
|
+
}
|
327
|
+
|
328
|
+
def get_statistics(self) -> Dict[str, Any]:
|
329
|
+
"""Get comprehensive HyperLogLog statistics."""
|
330
|
+
bucket_stats = self.get_bucket_statistics()
|
331
|
+
error_bounds = self.get_error_bounds()
|
332
|
+
|
333
|
+
return {
|
334
|
+
'precision': self.precision,
|
335
|
+
'num_buckets': self.num_buckets,
|
336
|
+
'alpha': self.alpha,
|
337
|
+
'estimated_cardinality': self.estimate_cardinality(),
|
338
|
+
'raw_estimate': self._raw_estimate(),
|
339
|
+
'items_tracked': self._size,
|
340
|
+
'total_additions': self._total_additions,
|
341
|
+
'bucket_stats': bucket_stats,
|
342
|
+
'error_bounds': error_bounds,
|
343
|
+
'memory_usage': self.num_buckets * 1 # 1 byte per bucket
|
344
|
+
}
|
345
|
+
|
346
|
+
def export_state(self) -> Dict[str, Any]:
|
347
|
+
"""Export HyperLogLog state."""
|
348
|
+
return {
|
349
|
+
'precision': self.precision,
|
350
|
+
'buckets': self._buckets.copy(),
|
351
|
+
'alpha': self.alpha,
|
352
|
+
'num_buckets': self.num_buckets,
|
353
|
+
'metadata': {
|
354
|
+
'items_tracked': list(self._items_added),
|
355
|
+
'total_additions': self._total_additions
|
356
|
+
}
|
357
|
+
}
|
358
|
+
|
359
|
+
def import_state(self, state: Dict[str, Any]) -> None:
|
360
|
+
"""Import HyperLogLog state."""
|
361
|
+
self.precision = state['precision']
|
362
|
+
self.num_buckets = state['num_buckets']
|
363
|
+
self.alpha = state['alpha']
|
364
|
+
self._buckets = state['buckets'].copy()
|
365
|
+
|
366
|
+
metadata = state['metadata']
|
367
|
+
self._items_added = set(metadata['items_tracked'])
|
368
|
+
self._total_additions = metadata['total_additions']
|
369
|
+
self._size = len(self._items_added)
|
370
|
+
|
371
|
+
# ============================================================================
|
372
|
+
# PERFORMANCE CHARACTERISTICS
|
373
|
+
# ============================================================================
|
374
|
+
|
375
|
+
@property
|
376
|
+
def backend_info(self) -> Dict[str, Any]:
|
377
|
+
"""Get backend implementation info."""
|
378
|
+
return {
|
379
|
+
'strategy': 'HYPERLOGLOG',
|
380
|
+
'backend': 'Probabilistic cardinality counter with bucket array',
|
381
|
+
'precision': self.precision,
|
382
|
+
'num_buckets': self.num_buckets,
|
383
|
+
'alpha': self.alpha,
|
384
|
+
'complexity': {
|
385
|
+
'add': 'O(1)',
|
386
|
+
'estimate': 'O(m)', # m = num_buckets
|
387
|
+
'merge': 'O(m)',
|
388
|
+
'space': 'O(m)',
|
389
|
+
'standard_error': f'1.04/√{self.num_buckets} ≈ {1.04/math.sqrt(self.num_buckets):.3f}'
|
390
|
+
}
|
391
|
+
}
|
392
|
+
|
393
|
+
@property
|
394
|
+
def metrics(self) -> Dict[str, Any]:
|
395
|
+
"""Get performance metrics."""
|
396
|
+
stats = self.get_statistics()
|
397
|
+
error_bounds = stats['error_bounds']
|
398
|
+
|
399
|
+
return {
|
400
|
+
'estimated_cardinality': stats['estimated_cardinality'],
|
401
|
+
'items_tracked': stats['items_tracked'],
|
402
|
+
'total_additions': stats['total_additions'],
|
403
|
+
'precision_bits': self.precision,
|
404
|
+
'standard_error': f"{error_bounds['standard_error']:.3f}",
|
405
|
+
'error_margin': f"{error_bounds['error_margin']:.1f}",
|
406
|
+
'memory_usage': f"{stats['memory_usage']} bytes"
|
407
|
+
}
|