exonware-xwnode 0.0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. exonware/__init__.py +14 -0
  2. exonware/xwnode/__init__.py +127 -0
  3. exonware/xwnode/base.py +676 -0
  4. exonware/xwnode/config.py +178 -0
  5. exonware/xwnode/contracts.py +730 -0
  6. exonware/xwnode/errors.py +503 -0
  7. exonware/xwnode/facade.py +460 -0
  8. exonware/xwnode/strategies/__init__.py +158 -0
  9. exonware/xwnode/strategies/advisor.py +463 -0
  10. exonware/xwnode/strategies/edges/__init__.py +32 -0
  11. exonware/xwnode/strategies/edges/adj_list.py +227 -0
  12. exonware/xwnode/strategies/edges/adj_matrix.py +391 -0
  13. exonware/xwnode/strategies/edges/base.py +169 -0
  14. exonware/xwnode/strategies/flyweight.py +328 -0
  15. exonware/xwnode/strategies/impls/__init__.py +13 -0
  16. exonware/xwnode/strategies/impls/_base_edge.py +403 -0
  17. exonware/xwnode/strategies/impls/_base_node.py +307 -0
  18. exonware/xwnode/strategies/impls/edge_adj_list.py +353 -0
  19. exonware/xwnode/strategies/impls/edge_adj_matrix.py +445 -0
  20. exonware/xwnode/strategies/impls/edge_bidir_wrapper.py +455 -0
  21. exonware/xwnode/strategies/impls/edge_block_adj_matrix.py +539 -0
  22. exonware/xwnode/strategies/impls/edge_coo.py +533 -0
  23. exonware/xwnode/strategies/impls/edge_csc.py +447 -0
  24. exonware/xwnode/strategies/impls/edge_csr.py +492 -0
  25. exonware/xwnode/strategies/impls/edge_dynamic_adj_list.py +503 -0
  26. exonware/xwnode/strategies/impls/edge_flow_network.py +555 -0
  27. exonware/xwnode/strategies/impls/edge_hyperedge_set.py +516 -0
  28. exonware/xwnode/strategies/impls/edge_neural_graph.py +650 -0
  29. exonware/xwnode/strategies/impls/edge_octree.py +574 -0
  30. exonware/xwnode/strategies/impls/edge_property_store.py +655 -0
  31. exonware/xwnode/strategies/impls/edge_quadtree.py +519 -0
  32. exonware/xwnode/strategies/impls/edge_rtree.py +820 -0
  33. exonware/xwnode/strategies/impls/edge_temporal_edgeset.py +558 -0
  34. exonware/xwnode/strategies/impls/edge_tree_graph_basic.py +271 -0
  35. exonware/xwnode/strategies/impls/edge_weighted_graph.py +411 -0
  36. exonware/xwnode/strategies/manager.py +775 -0
  37. exonware/xwnode/strategies/metrics.py +538 -0
  38. exonware/xwnode/strategies/migration.py +432 -0
  39. exonware/xwnode/strategies/nodes/__init__.py +50 -0
  40. exonware/xwnode/strategies/nodes/_base_node.py +307 -0
  41. exonware/xwnode/strategies/nodes/adjacency_list.py +267 -0
  42. exonware/xwnode/strategies/nodes/aho_corasick.py +345 -0
  43. exonware/xwnode/strategies/nodes/array_list.py +209 -0
  44. exonware/xwnode/strategies/nodes/base.py +247 -0
  45. exonware/xwnode/strategies/nodes/deque.py +200 -0
  46. exonware/xwnode/strategies/nodes/hash_map.py +135 -0
  47. exonware/xwnode/strategies/nodes/heap.py +307 -0
  48. exonware/xwnode/strategies/nodes/linked_list.py +232 -0
  49. exonware/xwnode/strategies/nodes/node_aho_corasick.py +520 -0
  50. exonware/xwnode/strategies/nodes/node_array_list.py +175 -0
  51. exonware/xwnode/strategies/nodes/node_avl_tree.py +371 -0
  52. exonware/xwnode/strategies/nodes/node_b_plus_tree.py +542 -0
  53. exonware/xwnode/strategies/nodes/node_bitmap.py +420 -0
  54. exonware/xwnode/strategies/nodes/node_bitset_dynamic.py +513 -0
  55. exonware/xwnode/strategies/nodes/node_bloom_filter.py +347 -0
  56. exonware/xwnode/strategies/nodes/node_btree.py +357 -0
  57. exonware/xwnode/strategies/nodes/node_count_min_sketch.py +470 -0
  58. exonware/xwnode/strategies/nodes/node_cow_tree.py +473 -0
  59. exonware/xwnode/strategies/nodes/node_cuckoo_hash.py +392 -0
  60. exonware/xwnode/strategies/nodes/node_fenwick_tree.py +301 -0
  61. exonware/xwnode/strategies/nodes/node_hash_map.py +269 -0
  62. exonware/xwnode/strategies/nodes/node_heap.py +191 -0
  63. exonware/xwnode/strategies/nodes/node_hyperloglog.py +407 -0
  64. exonware/xwnode/strategies/nodes/node_linked_list.py +409 -0
  65. exonware/xwnode/strategies/nodes/node_lsm_tree.py +400 -0
  66. exonware/xwnode/strategies/nodes/node_ordered_map.py +390 -0
  67. exonware/xwnode/strategies/nodes/node_ordered_map_balanced.py +565 -0
  68. exonware/xwnode/strategies/nodes/node_patricia.py +512 -0
  69. exonware/xwnode/strategies/nodes/node_persistent_tree.py +378 -0
  70. exonware/xwnode/strategies/nodes/node_radix_trie.py +452 -0
  71. exonware/xwnode/strategies/nodes/node_red_black_tree.py +497 -0
  72. exonware/xwnode/strategies/nodes/node_roaring_bitmap.py +570 -0
  73. exonware/xwnode/strategies/nodes/node_segment_tree.py +289 -0
  74. exonware/xwnode/strategies/nodes/node_set_hash.py +354 -0
  75. exonware/xwnode/strategies/nodes/node_set_tree.py +480 -0
  76. exonware/xwnode/strategies/nodes/node_skip_list.py +316 -0
  77. exonware/xwnode/strategies/nodes/node_splay_tree.py +393 -0
  78. exonware/xwnode/strategies/nodes/node_suffix_array.py +487 -0
  79. exonware/xwnode/strategies/nodes/node_treap.py +387 -0
  80. exonware/xwnode/strategies/nodes/node_tree_graph_hybrid.py +1434 -0
  81. exonware/xwnode/strategies/nodes/node_trie.py +252 -0
  82. exonware/xwnode/strategies/nodes/node_union_find.py +187 -0
  83. exonware/xwnode/strategies/nodes/node_xdata_optimized.py +369 -0
  84. exonware/xwnode/strategies/nodes/priority_queue.py +209 -0
  85. exonware/xwnode/strategies/nodes/queue.py +161 -0
  86. exonware/xwnode/strategies/nodes/sparse_matrix.py +206 -0
  87. exonware/xwnode/strategies/nodes/stack.py +152 -0
  88. exonware/xwnode/strategies/nodes/trie.py +274 -0
  89. exonware/xwnode/strategies/nodes/union_find.py +283 -0
  90. exonware/xwnode/strategies/pattern_detector.py +603 -0
  91. exonware/xwnode/strategies/performance_monitor.py +487 -0
  92. exonware/xwnode/strategies/queries/__init__.py +24 -0
  93. exonware/xwnode/strategies/queries/base.py +236 -0
  94. exonware/xwnode/strategies/queries/cql.py +201 -0
  95. exonware/xwnode/strategies/queries/cypher.py +181 -0
  96. exonware/xwnode/strategies/queries/datalog.py +70 -0
  97. exonware/xwnode/strategies/queries/elastic_dsl.py +70 -0
  98. exonware/xwnode/strategies/queries/eql.py +70 -0
  99. exonware/xwnode/strategies/queries/flux.py +70 -0
  100. exonware/xwnode/strategies/queries/gql.py +70 -0
  101. exonware/xwnode/strategies/queries/graphql.py +240 -0
  102. exonware/xwnode/strategies/queries/gremlin.py +181 -0
  103. exonware/xwnode/strategies/queries/hiveql.py +214 -0
  104. exonware/xwnode/strategies/queries/hql.py +70 -0
  105. exonware/xwnode/strategies/queries/jmespath.py +219 -0
  106. exonware/xwnode/strategies/queries/jq.py +66 -0
  107. exonware/xwnode/strategies/queries/json_query.py +66 -0
  108. exonware/xwnode/strategies/queries/jsoniq.py +248 -0
  109. exonware/xwnode/strategies/queries/kql.py +70 -0
  110. exonware/xwnode/strategies/queries/linq.py +238 -0
  111. exonware/xwnode/strategies/queries/logql.py +70 -0
  112. exonware/xwnode/strategies/queries/mql.py +68 -0
  113. exonware/xwnode/strategies/queries/n1ql.py +210 -0
  114. exonware/xwnode/strategies/queries/partiql.py +70 -0
  115. exonware/xwnode/strategies/queries/pig.py +215 -0
  116. exonware/xwnode/strategies/queries/promql.py +70 -0
  117. exonware/xwnode/strategies/queries/sparql.py +220 -0
  118. exonware/xwnode/strategies/queries/sql.py +275 -0
  119. exonware/xwnode/strategies/queries/xml_query.py +66 -0
  120. exonware/xwnode/strategies/queries/xpath.py +223 -0
  121. exonware/xwnode/strategies/queries/xquery.py +258 -0
  122. exonware/xwnode/strategies/queries/xwnode_executor.py +332 -0
  123. exonware/xwnode/strategies/queries/xwquery_strategy.py +424 -0
  124. exonware/xwnode/strategies/registry.py +604 -0
  125. exonware/xwnode/strategies/simple.py +273 -0
  126. exonware/xwnode/strategies/utils.py +532 -0
  127. exonware/xwnode/types.py +912 -0
  128. exonware/xwnode/version.py +78 -0
  129. exonware_xwnode-0.0.1.12.dist-info/METADATA +169 -0
  130. exonware_xwnode-0.0.1.12.dist-info/RECORD +132 -0
  131. exonware_xwnode-0.0.1.12.dist-info/WHEEL +4 -0
  132. exonware_xwnode-0.0.1.12.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,470 @@
1
+ """
2
+ Count-Min Sketch Node Strategy Implementation
3
+
4
+ This module implements the COUNT_MIN_SKETCH strategy for probabilistic
5
+ frequency estimation in data streams with bounded error guarantees.
6
+ """
7
+
8
+ from typing import Any, Iterator, List, Dict, Optional, Tuple
9
+ import hashlib
10
+ import math
11
+ from ._base_node import aNodeStrategy
12
+ from ...types import NodeMode, NodeTrait
13
+
14
+
15
+ class xCountMinSketchStrategy(aNodeStrategy):
16
+ """
17
+ Count-Min Sketch node strategy for streaming frequency estimation.
18
+
19
+ Provides memory-efficient approximate frequency counting with
20
+ probabilistic error bounds and no false negatives.
21
+ """
22
+
23
+ def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
24
+ """Initialize the Count-Min Sketch strategy."""
25
+ super().__init__(NodeMode.COUNT_MIN_SKETCH, traits, **options)
26
+
27
+ # Sketch parameters
28
+ self.epsilon = options.get('epsilon', 0.01) # Error bound (1%)
29
+ self.delta = options.get('delta', 0.01) # Confidence (99%)
30
+
31
+ # Calculate dimensions
32
+ self.width = self._calculate_width()
33
+ self.depth = self._calculate_depth()
34
+
35
+ # Core sketch matrix
36
+ self._sketch: List[List[int]] = [[0 for _ in range(self.width)] for _ in range(self.depth)]
37
+
38
+ # Hash functions (using different seeds)
39
+ self._hash_seeds = self._generate_hash_seeds()
40
+
41
+ # Key-value mapping for compatibility
42
+ self._values: Dict[str, Any] = {}
43
+ self._total_count = 0
44
+ self._unique_items = set()
45
+ self._size = 0
46
+
47
+ # Heavy hitters tracking
48
+ self.track_heavy_hitters = options.get('track_heavy_hitters', True)
49
+ self.heavy_hitter_threshold = options.get('heavy_hitter_threshold', 0.01) # 1% of total
50
+ self._heavy_hitters: Dict[str, int] = {}
51
+
52
+ def get_supported_traits(self) -> NodeTrait:
53
+ """Get the traits supported by the count-min sketch strategy."""
54
+ return (NodeTrait.PROBABILISTIC | NodeTrait.COMPRESSED | NodeTrait.STREAMING)
55
+
56
+ def _calculate_width(self) -> int:
57
+ """Calculate sketch width based on error bound."""
58
+ # width = ceil(e / epsilon)
59
+ e = math.e
60
+ return max(1, int(math.ceil(e / self.epsilon)))
61
+
62
+ def _calculate_depth(self) -> int:
63
+ """Calculate sketch depth based on confidence."""
64
+ # depth = ceil(ln(1/delta))
65
+ return max(1, int(math.ceil(math.log(1.0 / self.delta))))
66
+
67
+ def _generate_hash_seeds(self) -> List[int]:
68
+ """Generate seeds for hash functions."""
69
+ seeds = []
70
+ for i in range(self.depth):
71
+ # Use different primes as seeds
72
+ primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71]
73
+ seed = primes[i % len(primes)] * (i + 1) * 1000 + i
74
+ seeds.append(seed)
75
+ return seeds
76
+
77
+ def _hash_item(self, item: str, seed: int) -> int:
78
+ """Hash item to bucket using given seed."""
79
+ hash_obj = hashlib.md5(f"{item}{seed}".encode())
80
+ hash_value = int(hash_obj.hexdigest(), 16)
81
+ return hash_value % self.width
82
+
83
+ def _update_heavy_hitters(self, item: str, estimated_count: int) -> None:
84
+ """Update heavy hitters tracking."""
85
+ if not self.track_heavy_hitters:
86
+ return
87
+
88
+ threshold = self._total_count * self.heavy_hitter_threshold
89
+
90
+ if estimated_count >= threshold:
91
+ self._heavy_hitters[item] = estimated_count
92
+ else:
93
+ # Remove from heavy hitters if below threshold
94
+ self._heavy_hitters.pop(item, None)
95
+
96
+ # ============================================================================
97
+ # CORE OPERATIONS (Key-based interface for compatibility)
98
+ # ============================================================================
99
+
100
+ def put(self, key: Any, value: Any = None) -> None:
101
+ """Add item to count-min sketch."""
102
+ item = str(key)
103
+ count = 1
104
+
105
+ # If value is a number, treat it as count
106
+ if isinstance(value, (int, float)) and value > 0:
107
+ count = int(value)
108
+
109
+ # Update sketch
110
+ for i in range(self.depth):
111
+ bucket = self._hash_item(item, self._hash_seeds[i])
112
+ self._sketch[i][bucket] += count
113
+
114
+ # Update tracking
115
+ self._total_count += count
116
+ self._unique_items.add(item)
117
+
118
+ # Store value
119
+ self._values[item] = value if value is not None else count
120
+
121
+ if item not in self._values or self._size == 0:
122
+ self._size += 1
123
+
124
+ # Update heavy hitters
125
+ estimated_count = self.estimate_count(item)
126
+ self._update_heavy_hitters(item, estimated_count)
127
+
128
+ def get(self, key: Any, default: Any = None) -> Any:
129
+ """Get estimated count or stored value."""
130
+ item = str(key)
131
+
132
+ if key == "total_count":
133
+ return self._total_count
134
+ elif key == "unique_items":
135
+ return len(self._unique_items)
136
+ elif key == "heavy_hitters":
137
+ return dict(self._heavy_hitters)
138
+ elif key == "sketch_info":
139
+ return {
140
+ 'width': self.width,
141
+ 'depth': self.depth,
142
+ 'epsilon': self.epsilon,
143
+ 'delta': self.delta,
144
+ 'total_count': self._total_count
145
+ }
146
+ elif key == "estimated_count":
147
+ # Return function to estimate any item
148
+ return lambda x: self.estimate_count(x)
149
+ elif item in self._values:
150
+ return self._values[item]
151
+ else:
152
+ # Return estimated count
153
+ return self.estimate_count(item)
154
+
155
+ def has(self, key: Any) -> bool:
156
+ """Check if item might exist (probabilistic)."""
157
+ item = str(key)
158
+
159
+ if key in ["total_count", "unique_items", "heavy_hitters", "sketch_info", "estimated_count"]:
160
+ return True
161
+
162
+ # Item exists if estimated count > 0
163
+ return self.estimate_count(item) > 0
164
+
165
+ def remove(self, key: Any) -> bool:
166
+ """Remove item (limited support - decrements count)."""
167
+ item = str(key)
168
+
169
+ if item in self._values:
170
+ # Decrement count in sketch
171
+ for i in range(self.depth):
172
+ bucket = self._hash_item(item, self._hash_seeds[i])
173
+ if self._sketch[i][bucket] > 0:
174
+ self._sketch[i][bucket] -= 1
175
+
176
+ self._total_count = max(0, self._total_count - 1)
177
+
178
+ # Remove from values if count becomes 0
179
+ if self.estimate_count(item) == 0:
180
+ del self._values[item]
181
+ self._unique_items.discard(item)
182
+ self._size -= 1
183
+ self._heavy_hitters.pop(item, None)
184
+
185
+ return True
186
+
187
+ return False
188
+
189
+ def delete(self, key: Any) -> bool:
190
+ """Remove item (alias for remove)."""
191
+ return self.remove(key)
192
+
193
+ def clear(self) -> None:
194
+ """Clear all data."""
195
+ self._sketch = [[0 for _ in range(self.width)] for _ in range(self.depth)]
196
+ self._values.clear()
197
+ self._unique_items.clear()
198
+ self._heavy_hitters.clear()
199
+ self._total_count = 0
200
+ self._size = 0
201
+
202
+ def keys(self) -> Iterator[str]:
203
+ """Get all tracked items."""
204
+ for item in self._unique_items:
205
+ yield item
206
+ yield "total_count"
207
+ yield "unique_items"
208
+ yield "heavy_hitters"
209
+ yield "sketch_info"
210
+ yield "estimated_count"
211
+
212
+ def values(self) -> Iterator[Any]:
213
+ """Get all values."""
214
+ for item in self._unique_items:
215
+ yield self.estimate_count(item)
216
+ yield self._total_count
217
+ yield len(self._unique_items)
218
+ yield dict(self._heavy_hitters)
219
+ yield self.get("sketch_info")
220
+ yield self.get("estimated_count")
221
+
222
+ def items(self) -> Iterator[tuple[str, Any]]:
223
+ """Get all item-count pairs."""
224
+ for item in self._unique_items:
225
+ yield (item, self.estimate_count(item))
226
+ yield ("total_count", self._total_count)
227
+ yield ("unique_items", len(self._unique_items))
228
+ yield ("heavy_hitters", dict(self._heavy_hitters))
229
+ yield ("sketch_info", self.get("sketch_info"))
230
+ yield ("estimated_count", self.get("estimated_count"))
231
+
232
+ def __len__(self) -> int:
233
+ """Get number of unique items tracked."""
234
+ return self._size
235
+
236
+ def to_native(self) -> Dict[str, Any]:
237
+ """Convert to native Python dict."""
238
+ result = {}
239
+ for item in self._unique_items:
240
+ result[item] = self.estimate_count(item)
241
+
242
+ result.update({
243
+ "total_count": self._total_count,
244
+ "unique_items": len(self._unique_items),
245
+ "heavy_hitters": dict(self._heavy_hitters),
246
+ "sketch_info": self.get("sketch_info")
247
+ })
248
+
249
+ return result
250
+
251
+ @property
252
+ def is_list(self) -> bool:
253
+ """This is not a list strategy."""
254
+ return False
255
+
256
+ @property
257
+ def is_dict(self) -> bool:
258
+ """This behaves like a dict with probabilistic semantics."""
259
+ return True
260
+
261
+ # ============================================================================
262
+ # COUNT-MIN SKETCH SPECIFIC OPERATIONS
263
+ # ============================================================================
264
+
265
+ def estimate_count(self, item: str) -> int:
266
+ """Estimate count of item."""
267
+ if not item:
268
+ return 0
269
+
270
+ min_count = float('inf')
271
+
272
+ for i in range(self.depth):
273
+ bucket = self._hash_item(item, self._hash_seeds[i])
274
+ count = self._sketch[i][bucket]
275
+ min_count = min(min_count, count)
276
+
277
+ return int(min_count) if min_count != float('inf') else 0
278
+
279
+ def increment(self, item: str, count: int = 1) -> None:
280
+ """Increment count for item."""
281
+ self.put(item, count)
282
+
283
+ def get_frequent_items(self, threshold: Optional[int] = None) -> List[Tuple[str, int]]:
284
+ """Get items above frequency threshold."""
285
+ if threshold is None:
286
+ threshold = max(1, int(self._total_count * self.heavy_hitter_threshold))
287
+
288
+ frequent = []
289
+ for item in self._unique_items:
290
+ count = self.estimate_count(item)
291
+ if count >= threshold:
292
+ frequent.append((item, count))
293
+
294
+ # Sort by frequency (descending)
295
+ frequent.sort(key=lambda x: x[1], reverse=True)
296
+ return frequent
297
+
298
+ def get_top_k(self, k: int) -> List[Tuple[str, int]]:
299
+ """Get top-k most frequent items."""
300
+ all_items = [(item, self.estimate_count(item)) for item in self._unique_items]
301
+ all_items.sort(key=lambda x: x[1], reverse=True)
302
+ return all_items[:k]
303
+
304
+ def merge(self, other: 'xCountMinSketchStrategy') -> 'xCountMinSketchStrategy':
305
+ """Merge with another Count-Min Sketch."""
306
+ if (self.width != other.width or self.depth != other.depth or
307
+ self._hash_seeds != other._hash_seeds):
308
+ raise ValueError("Cannot merge sketches with different parameters")
309
+
310
+ # Create new sketch
311
+ merged = xCountMinSketchStrategy(
312
+ traits=self._traits,
313
+ epsilon=self.epsilon,
314
+ delta=self.delta,
315
+ track_heavy_hitters=self.track_heavy_hitters,
316
+ heavy_hitter_threshold=self.heavy_hitter_threshold
317
+ )
318
+
319
+ # Merge sketch matrices
320
+ for i in range(self.depth):
321
+ for j in range(self.width):
322
+ merged._sketch[i][j] = self._sketch[i][j] + other._sketch[i][j]
323
+
324
+ # Merge metadata
325
+ merged._total_count = self._total_count + other._total_count
326
+ merged._unique_items = self._unique_items | other._unique_items
327
+ merged._size = len(merged._unique_items)
328
+
329
+ # Merge values (prefer this sketch's values)
330
+ merged._values.update(other._values)
331
+ merged._values.update(self._values)
332
+
333
+ # Recompute heavy hitters
334
+ for item in merged._unique_items:
335
+ count = merged.estimate_count(item)
336
+ merged._update_heavy_hitters(item, count)
337
+
338
+ return merged
339
+
340
+ def get_error_bounds(self, item: str) -> Tuple[int, int, float]:
341
+ """Get error bounds for item count estimate."""
342
+ estimate = self.estimate_count(item)
343
+
344
+ # Error bound: estimate <= true_count <= estimate + epsilon * total_count
345
+ max_error = int(self.epsilon * self._total_count)
346
+ confidence = 1.0 - self.delta
347
+
348
+ return estimate, estimate + max_error, confidence
349
+
350
+ def point_query(self, item: str) -> Dict[str, Any]:
351
+ """Comprehensive point query with error analysis."""
352
+ estimate = self.estimate_count(item)
353
+ lower_bound, upper_bound, confidence = self.get_error_bounds(item)
354
+
355
+ return {
356
+ 'item': item,
357
+ 'estimated_count': estimate,
358
+ 'lower_bound': lower_bound,
359
+ 'upper_bound': upper_bound,
360
+ 'confidence': confidence,
361
+ 'relative_frequency': estimate / max(1, self._total_count),
362
+ 'is_heavy_hitter': item in self._heavy_hitters
363
+ }
364
+
365
+ def range_query(self, items: List[str]) -> int:
366
+ """Estimate total count for a range of items."""
367
+ # Simple sum - can lead to overestimation due to hash collisions
368
+ return sum(self.estimate_count(item) for item in items)
369
+
370
+ def get_sketch_statistics(self) -> Dict[str, Any]:
371
+ """Get comprehensive sketch statistics."""
372
+ # Calculate sketch density
373
+ total_cells = self.width * self.depth
374
+ non_zero_cells = sum(1 for i in range(self.depth) for j in range(self.width)
375
+ if self._sketch[i][j] > 0)
376
+ density = non_zero_cells / total_cells if total_cells > 0 else 0
377
+
378
+ # Calculate hash distribution
379
+ max_bucket_count = max(max(row) for row in self._sketch) if self._sketch else 0
380
+ avg_bucket_count = self._total_count / total_cells if total_cells > 0 else 0
381
+
382
+ return {
383
+ 'width': self.width,
384
+ 'depth': self.depth,
385
+ 'total_cells': total_cells,
386
+ 'non_zero_cells': non_zero_cells,
387
+ 'density': density,
388
+ 'total_count': self._total_count,
389
+ 'unique_items': len(self._unique_items),
390
+ 'heavy_hitters': len(self._heavy_hitters),
391
+ 'max_bucket_count': max_bucket_count,
392
+ 'avg_bucket_count': avg_bucket_count,
393
+ 'theoretical_error_bound': self.epsilon,
394
+ 'theoretical_confidence': 1.0 - self.delta,
395
+ 'memory_usage': total_cells * 4 # 4 bytes per int
396
+ }
397
+
398
+ def export_sketch(self) -> Dict[str, Any]:
399
+ """Export sketch for analysis or persistence."""
400
+ return {
401
+ 'sketch_matrix': [row.copy() for row in self._sketch],
402
+ 'parameters': {
403
+ 'width': self.width,
404
+ 'depth': self.depth,
405
+ 'epsilon': self.epsilon,
406
+ 'delta': self.delta,
407
+ 'hash_seeds': self._hash_seeds.copy()
408
+ },
409
+ 'metadata': {
410
+ 'total_count': self._total_count,
411
+ 'unique_items': list(self._unique_items),
412
+ 'heavy_hitters': dict(self._heavy_hitters)
413
+ }
414
+ }
415
+
416
+ def import_sketch(self, sketch_data: Dict[str, Any]) -> None:
417
+ """Import sketch from exported data."""
418
+ self._sketch = [row.copy() for row in sketch_data['sketch_matrix']]
419
+
420
+ params = sketch_data['parameters']
421
+ self.width = params['width']
422
+ self.depth = params['depth']
423
+ self.epsilon = params['epsilon']
424
+ self.delta = params['delta']
425
+ self._hash_seeds = params['hash_seeds'].copy()
426
+
427
+ metadata = sketch_data['metadata']
428
+ self._total_count = metadata['total_count']
429
+ self._unique_items = set(metadata['unique_items'])
430
+ self._heavy_hitters = metadata['heavy_hitters'].copy()
431
+ self._size = len(self._unique_items)
432
+
433
+ # ============================================================================
434
+ # PERFORMANCE CHARACTERISTICS
435
+ # ============================================================================
436
+
437
+ @property
438
+ def backend_info(self) -> Dict[str, Any]:
439
+ """Get backend implementation info."""
440
+ return {
441
+ 'strategy': 'COUNT_MIN_SKETCH',
442
+ 'backend': 'Probabilistic frequency counter with hash matrix',
443
+ 'width': self.width,
444
+ 'depth': self.depth,
445
+ 'epsilon': self.epsilon,
446
+ 'delta': self.delta,
447
+ 'track_heavy_hitters': self.track_heavy_hitters,
448
+ 'complexity': {
449
+ 'update': 'O(d)', # d = depth
450
+ 'query': 'O(d)',
451
+ 'space': 'O(w * d)', # w = width, d = depth
452
+ 'merge': 'O(w * d)',
453
+ 'error_bound': f'ε * ||f||₁ with probability ≥ {1.0 - self.delta}'
454
+ }
455
+ }
456
+
457
+ @property
458
+ def metrics(self) -> Dict[str, Any]:
459
+ """Get performance metrics."""
460
+ stats = self.get_sketch_statistics()
461
+
462
+ return {
463
+ 'total_count': stats['total_count'],
464
+ 'unique_items': stats['unique_items'],
465
+ 'sketch_density': f"{stats['density'] * 100:.1f}%",
466
+ 'heavy_hitters': stats['heavy_hitters'],
467
+ 'error_bound': f"{self.epsilon * 100:.2f}%",
468
+ 'confidence': f"{(1.0 - self.delta) * 100:.1f}%",
469
+ 'memory_usage': f"{stats['memory_usage']} bytes"
470
+ }