exonware-xwnode 0.0.1.21__py3-none-any.whl → 0.0.1.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- exonware/__init__.py +8 -1
- exonware/xwnode/__init__.py +18 -5
- exonware/xwnode/add_strategy_types.py +165 -0
- exonware/xwnode/base.py +7 -5
- exonware/xwnode/common/__init__.py +1 -1
- exonware/xwnode/common/graph/__init__.py +30 -0
- exonware/xwnode/common/graph/caching.py +131 -0
- exonware/xwnode/common/graph/contracts.py +100 -0
- exonware/xwnode/common/graph/errors.py +44 -0
- exonware/xwnode/common/graph/indexing.py +260 -0
- exonware/xwnode/common/graph/manager.py +568 -0
- exonware/xwnode/common/management/__init__.py +3 -5
- exonware/xwnode/common/management/manager.py +9 -9
- exonware/xwnode/common/management/migration.py +6 -6
- exonware/xwnode/common/monitoring/__init__.py +3 -5
- exonware/xwnode/common/monitoring/metrics.py +7 -3
- exonware/xwnode/common/monitoring/pattern_detector.py +2 -2
- exonware/xwnode/common/monitoring/performance_monitor.py +6 -2
- exonware/xwnode/common/patterns/__init__.py +3 -5
- exonware/xwnode/common/patterns/advisor.py +1 -1
- exonware/xwnode/common/patterns/flyweight.py +6 -2
- exonware/xwnode/common/patterns/registry.py +203 -184
- exonware/xwnode/common/utils/__init__.py +25 -11
- exonware/xwnode/common/utils/simple.py +1 -1
- exonware/xwnode/config.py +3 -8
- exonware/xwnode/contracts.py +4 -105
- exonware/xwnode/defs.py +413 -159
- exonware/xwnode/edges/strategies/__init__.py +86 -4
- exonware/xwnode/edges/strategies/_base_edge.py +2 -2
- exonware/xwnode/edges/strategies/adj_list.py +287 -121
- exonware/xwnode/edges/strategies/adj_matrix.py +316 -222
- exonware/xwnode/edges/strategies/base.py +1 -1
- exonware/xwnode/edges/strategies/{edge_bidir_wrapper.py → bidir_wrapper.py} +45 -4
- exonware/xwnode/edges/strategies/bitemporal.py +520 -0
- exonware/xwnode/edges/strategies/{edge_block_adj_matrix.py → block_adj_matrix.py} +77 -6
- exonware/xwnode/edges/strategies/bv_graph.py +664 -0
- exonware/xwnode/edges/strategies/compressed_graph.py +217 -0
- exonware/xwnode/edges/strategies/{edge_coo.py → coo.py} +46 -4
- exonware/xwnode/edges/strategies/{edge_csc.py → csc.py} +45 -4
- exonware/xwnode/edges/strategies/{edge_csr.py → csr.py} +94 -12
- exonware/xwnode/edges/strategies/{edge_dynamic_adj_list.py → dynamic_adj_list.py} +46 -4
- exonware/xwnode/edges/strategies/edge_list.py +168 -0
- exonware/xwnode/edges/strategies/edge_property_store.py +2 -2
- exonware/xwnode/edges/strategies/euler_tour.py +560 -0
- exonware/xwnode/edges/strategies/{edge_flow_network.py → flow_network.py} +2 -2
- exonware/xwnode/edges/strategies/graphblas.py +449 -0
- exonware/xwnode/edges/strategies/hnsw.py +637 -0
- exonware/xwnode/edges/strategies/hop2_labels.py +467 -0
- exonware/xwnode/edges/strategies/{edge_hyperedge_set.py → hyperedge_set.py} +2 -2
- exonware/xwnode/edges/strategies/incidence_matrix.py +250 -0
- exonware/xwnode/edges/strategies/k2_tree.py +613 -0
- exonware/xwnode/edges/strategies/link_cut.py +626 -0
- exonware/xwnode/edges/strategies/multiplex.py +532 -0
- exonware/xwnode/edges/strategies/{edge_neural_graph.py → neural_graph.py} +2 -2
- exonware/xwnode/edges/strategies/{edge_octree.py → octree.py} +69 -11
- exonware/xwnode/edges/strategies/{edge_quadtree.py → quadtree.py} +66 -10
- exonware/xwnode/edges/strategies/roaring_adj.py +438 -0
- exonware/xwnode/edges/strategies/{edge_rtree.py → rtree.py} +43 -5
- exonware/xwnode/edges/strategies/{edge_temporal_edgeset.py → temporal_edgeset.py} +24 -5
- exonware/xwnode/edges/strategies/{edge_tree_graph_basic.py → tree_graph_basic.py} +78 -7
- exonware/xwnode/edges/strategies/{edge_weighted_graph.py → weighted_graph.py} +188 -10
- exonware/xwnode/errors.py +3 -6
- exonware/xwnode/facade.py +20 -20
- exonware/xwnode/nodes/strategies/__init__.py +29 -9
- exonware/xwnode/nodes/strategies/adjacency_list.py +650 -177
- exonware/xwnode/nodes/strategies/aho_corasick.py +358 -183
- exonware/xwnode/nodes/strategies/array_list.py +36 -3
- exonware/xwnode/nodes/strategies/art.py +581 -0
- exonware/xwnode/nodes/strategies/{node_avl_tree.py → avl_tree.py} +77 -6
- exonware/xwnode/nodes/strategies/{node_b_plus_tree.py → b_plus_tree.py} +81 -40
- exonware/xwnode/nodes/strategies/{node_btree.py → b_tree.py} +79 -9
- exonware/xwnode/nodes/strategies/base.py +469 -98
- exonware/xwnode/nodes/strategies/{node_bitmap.py → bitmap.py} +12 -12
- exonware/xwnode/nodes/strategies/{node_bitset_dynamic.py → bitset_dynamic.py} +11 -11
- exonware/xwnode/nodes/strategies/{node_bloom_filter.py → bloom_filter.py} +15 -2
- exonware/xwnode/nodes/strategies/bloomier_filter.py +519 -0
- exonware/xwnode/nodes/strategies/bw_tree.py +531 -0
- exonware/xwnode/nodes/strategies/contracts.py +1 -1
- exonware/xwnode/nodes/strategies/{node_count_min_sketch.py → count_min_sketch.py} +3 -2
- exonware/xwnode/nodes/strategies/{node_cow_tree.py → cow_tree.py} +135 -13
- exonware/xwnode/nodes/strategies/crdt_map.py +629 -0
- exonware/xwnode/nodes/strategies/{node_cuckoo_hash.py → cuckoo_hash.py} +2 -2
- exonware/xwnode/nodes/strategies/{node_xdata_optimized.py → data_interchange_optimized.py} +21 -4
- exonware/xwnode/nodes/strategies/dawg.py +876 -0
- exonware/xwnode/nodes/strategies/deque.py +321 -153
- exonware/xwnode/nodes/strategies/extendible_hash.py +93 -0
- exonware/xwnode/nodes/strategies/{node_fenwick_tree.py → fenwick_tree.py} +111 -19
- exonware/xwnode/nodes/strategies/hamt.py +403 -0
- exonware/xwnode/nodes/strategies/hash_map.py +354 -67
- exonware/xwnode/nodes/strategies/heap.py +105 -5
- exonware/xwnode/nodes/strategies/hopscotch_hash.py +525 -0
- exonware/xwnode/nodes/strategies/{node_hyperloglog.py → hyperloglog.py} +6 -5
- exonware/xwnode/nodes/strategies/interval_tree.py +742 -0
- exonware/xwnode/nodes/strategies/kd_tree.py +703 -0
- exonware/xwnode/nodes/strategies/learned_index.py +533 -0
- exonware/xwnode/nodes/strategies/linear_hash.py +93 -0
- exonware/xwnode/nodes/strategies/linked_list.py +316 -119
- exonware/xwnode/nodes/strategies/{node_lsm_tree.py → lsm_tree.py} +219 -15
- exonware/xwnode/nodes/strategies/masstree.py +130 -0
- exonware/xwnode/nodes/strategies/{node_persistent_tree.py → persistent_tree.py} +149 -9
- exonware/xwnode/nodes/strategies/priority_queue.py +544 -132
- exonware/xwnode/nodes/strategies/queue.py +249 -120
- exonware/xwnode/nodes/strategies/{node_red_black_tree.py → red_black_tree.py} +183 -72
- exonware/xwnode/nodes/strategies/{node_roaring_bitmap.py → roaring_bitmap.py} +19 -6
- exonware/xwnode/nodes/strategies/rope.py +717 -0
- exonware/xwnode/nodes/strategies/{node_segment_tree.py → segment_tree.py} +106 -106
- exonware/xwnode/nodes/strategies/{node_set_hash.py → set_hash.py} +30 -29
- exonware/xwnode/nodes/strategies/{node_skip_list.py → skip_list.py} +74 -6
- exonware/xwnode/nodes/strategies/sparse_matrix.py +427 -131
- exonware/xwnode/nodes/strategies/{node_splay_tree.py → splay_tree.py} +55 -6
- exonware/xwnode/nodes/strategies/stack.py +244 -112
- exonware/xwnode/nodes/strategies/{node_suffix_array.py → suffix_array.py} +5 -1
- exonware/xwnode/nodes/strategies/t_tree.py +94 -0
- exonware/xwnode/nodes/strategies/{node_treap.py → treap.py} +75 -6
- exonware/xwnode/nodes/strategies/{node_tree_graph_hybrid.py → tree_graph_hybrid.py} +46 -5
- exonware/xwnode/nodes/strategies/trie.py +153 -9
- exonware/xwnode/nodes/strategies/union_find.py +111 -5
- exonware/xwnode/nodes/strategies/veb_tree.py +856 -0
- exonware/xwnode/strategies/__init__.py +5 -51
- exonware/xwnode/version.py +3 -3
- {exonware_xwnode-0.0.1.21.dist-info → exonware_xwnode-0.0.1.23.dist-info}/METADATA +23 -3
- exonware_xwnode-0.0.1.23.dist-info/RECORD +130 -0
- exonware/xwnode/edges/strategies/edge_adj_list.py +0 -353
- exonware/xwnode/edges/strategies/edge_adj_matrix.py +0 -445
- exonware/xwnode/nodes/strategies/_base_node.py +0 -307
- exonware/xwnode/nodes/strategies/node_aho_corasick.py +0 -525
- exonware/xwnode/nodes/strategies/node_array_list.py +0 -179
- exonware/xwnode/nodes/strategies/node_hash_map.py +0 -273
- exonware/xwnode/nodes/strategies/node_heap.py +0 -196
- exonware/xwnode/nodes/strategies/node_linked_list.py +0 -413
- exonware/xwnode/nodes/strategies/node_trie.py +0 -257
- exonware/xwnode/nodes/strategies/node_union_find.py +0 -192
- exonware/xwnode/queries/executors/__init__.py +0 -47
- exonware/xwnode/queries/executors/advanced/__init__.py +0 -37
- exonware/xwnode/queries/executors/advanced/aggregate_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/ask_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/construct_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/describe_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/for_loop_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/foreach_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/join_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/let_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/mutation_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/options_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/pipe_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/subscribe_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/subscription_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/union_executor.py +0 -50
- exonware/xwnode/queries/executors/advanced/window_executor.py +0 -51
- exonware/xwnode/queries/executors/advanced/with_cte_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/__init__.py +0 -21
- exonware/xwnode/queries/executors/aggregation/avg_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/count_executor.py +0 -38
- exonware/xwnode/queries/executors/aggregation/distinct_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/group_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/having_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/max_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/min_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/sum_executor.py +0 -50
- exonware/xwnode/queries/executors/aggregation/summarize_executor.py +0 -50
- exonware/xwnode/queries/executors/array/__init__.py +0 -9
- exonware/xwnode/queries/executors/array/indexing_executor.py +0 -51
- exonware/xwnode/queries/executors/array/slicing_executor.py +0 -51
- exonware/xwnode/queries/executors/base.py +0 -257
- exonware/xwnode/queries/executors/capability_checker.py +0 -204
- exonware/xwnode/queries/executors/contracts.py +0 -166
- exonware/xwnode/queries/executors/core/__init__.py +0 -17
- exonware/xwnode/queries/executors/core/create_executor.py +0 -96
- exonware/xwnode/queries/executors/core/delete_executor.py +0 -99
- exonware/xwnode/queries/executors/core/drop_executor.py +0 -100
- exonware/xwnode/queries/executors/core/insert_executor.py +0 -39
- exonware/xwnode/queries/executors/core/select_executor.py +0 -152
- exonware/xwnode/queries/executors/core/update_executor.py +0 -102
- exonware/xwnode/queries/executors/data/__init__.py +0 -13
- exonware/xwnode/queries/executors/data/alter_executor.py +0 -50
- exonware/xwnode/queries/executors/data/load_executor.py +0 -50
- exonware/xwnode/queries/executors/data/merge_executor.py +0 -50
- exonware/xwnode/queries/executors/data/store_executor.py +0 -50
- exonware/xwnode/queries/executors/defs.py +0 -93
- exonware/xwnode/queries/executors/engine.py +0 -221
- exonware/xwnode/queries/executors/errors.py +0 -68
- exonware/xwnode/queries/executors/filtering/__init__.py +0 -25
- exonware/xwnode/queries/executors/filtering/between_executor.py +0 -80
- exonware/xwnode/queries/executors/filtering/filter_executor.py +0 -79
- exonware/xwnode/queries/executors/filtering/has_executor.py +0 -70
- exonware/xwnode/queries/executors/filtering/in_executor.py +0 -70
- exonware/xwnode/queries/executors/filtering/like_executor.py +0 -76
- exonware/xwnode/queries/executors/filtering/optional_executor.py +0 -76
- exonware/xwnode/queries/executors/filtering/range_executor.py +0 -80
- exonware/xwnode/queries/executors/filtering/term_executor.py +0 -77
- exonware/xwnode/queries/executors/filtering/values_executor.py +0 -71
- exonware/xwnode/queries/executors/filtering/where_executor.py +0 -44
- exonware/xwnode/queries/executors/graph/__init__.py +0 -15
- exonware/xwnode/queries/executors/graph/in_traverse_executor.py +0 -51
- exonware/xwnode/queries/executors/graph/match_executor.py +0 -51
- exonware/xwnode/queries/executors/graph/out_executor.py +0 -51
- exonware/xwnode/queries/executors/graph/path_executor.py +0 -51
- exonware/xwnode/queries/executors/graph/return_executor.py +0 -51
- exonware/xwnode/queries/executors/ordering/__init__.py +0 -9
- exonware/xwnode/queries/executors/ordering/by_executor.py +0 -50
- exonware/xwnode/queries/executors/ordering/order_executor.py +0 -51
- exonware/xwnode/queries/executors/projection/__init__.py +0 -9
- exonware/xwnode/queries/executors/projection/extend_executor.py +0 -50
- exonware/xwnode/queries/executors/projection/project_executor.py +0 -50
- exonware/xwnode/queries/executors/registry.py +0 -173
- exonware/xwnode/queries/parsers/__init__.py +0 -26
- exonware/xwnode/queries/parsers/base.py +0 -86
- exonware/xwnode/queries/parsers/contracts.py +0 -46
- exonware/xwnode/queries/parsers/errors.py +0 -53
- exonware/xwnode/queries/parsers/sql_param_extractor.py +0 -318
- exonware/xwnode/queries/strategies/__init__.py +0 -24
- exonware/xwnode/queries/strategies/base.py +0 -236
- exonware/xwnode/queries/strategies/cql.py +0 -201
- exonware/xwnode/queries/strategies/cypher.py +0 -181
- exonware/xwnode/queries/strategies/datalog.py +0 -70
- exonware/xwnode/queries/strategies/elastic_dsl.py +0 -70
- exonware/xwnode/queries/strategies/eql.py +0 -70
- exonware/xwnode/queries/strategies/flux.py +0 -70
- exonware/xwnode/queries/strategies/gql.py +0 -70
- exonware/xwnode/queries/strategies/graphql.py +0 -240
- exonware/xwnode/queries/strategies/gremlin.py +0 -181
- exonware/xwnode/queries/strategies/hiveql.py +0 -214
- exonware/xwnode/queries/strategies/hql.py +0 -70
- exonware/xwnode/queries/strategies/jmespath.py +0 -219
- exonware/xwnode/queries/strategies/jq.py +0 -66
- exonware/xwnode/queries/strategies/json_query.py +0 -66
- exonware/xwnode/queries/strategies/jsoniq.py +0 -248
- exonware/xwnode/queries/strategies/kql.py +0 -70
- exonware/xwnode/queries/strategies/linq.py +0 -238
- exonware/xwnode/queries/strategies/logql.py +0 -70
- exonware/xwnode/queries/strategies/mql.py +0 -68
- exonware/xwnode/queries/strategies/n1ql.py +0 -210
- exonware/xwnode/queries/strategies/partiql.py +0 -70
- exonware/xwnode/queries/strategies/pig.py +0 -215
- exonware/xwnode/queries/strategies/promql.py +0 -70
- exonware/xwnode/queries/strategies/sparql.py +0 -220
- exonware/xwnode/queries/strategies/sql.py +0 -275
- exonware/xwnode/queries/strategies/xml_query.py +0 -66
- exonware/xwnode/queries/strategies/xpath.py +0 -223
- exonware/xwnode/queries/strategies/xquery.py +0 -258
- exonware/xwnode/queries/strategies/xwnode_executor.py +0 -332
- exonware/xwnode/queries/strategies/xwquery.py +0 -456
- exonware_xwnode-0.0.1.21.dist-info/RECORD +0 -214
- /exonware/xwnode/nodes/strategies/{node_ordered_map.py → ordered_map.py} +0 -0
- /exonware/xwnode/nodes/strategies/{node_ordered_map_balanced.py → ordered_map_balanced.py} +0 -0
- /exonware/xwnode/nodes/strategies/{node_patricia.py → patricia.py} +0 -0
- /exonware/xwnode/nodes/strategies/{node_radix_trie.py → radix_trie.py} +0 -0
- /exonware/xwnode/nodes/strategies/{node_set_tree.py → set_tree.py} +0 -0
- {exonware_xwnode-0.0.1.21.dist-info → exonware_xwnode-0.0.1.23.dist-info}/WHEEL +0 -0
- {exonware_xwnode-0.0.1.21.dist-info → exonware_xwnode-0.0.1.23.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,525 @@
|
|
1
|
+
"""
|
2
|
+
#exonware/xwnode/src/exonware/xwnode/nodes/strategies/hopscotch_hash.py
|
3
|
+
|
4
|
+
Hopscotch Hashing Node Strategy Implementation
|
5
|
+
|
6
|
+
This module implements the HOPSCOTCH_HASH strategy for cache-friendly
|
7
|
+
open addressing with bounded neighborhood search.
|
8
|
+
|
9
|
+
Company: eXonware.com
|
10
|
+
Author: Eng. Muhammad AlShehri
|
11
|
+
Email: connect@exonware.com
|
12
|
+
Version: 0.0.1.23
|
13
|
+
Generation Date: 12-Oct-2025
|
14
|
+
"""
|
15
|
+
|
16
|
+
from typing import Any, Iterator, List, Dict, Optional, Tuple
|
17
|
+
from .base import ANodeTreeStrategy
|
18
|
+
from .contracts import NodeType
|
19
|
+
from ...defs import NodeMode, NodeTrait
|
20
|
+
from ...errors import XWNodeError, XWNodeValueError
|
21
|
+
|
22
|
+
|
23
|
+
class HopscotchEntry:
|
24
|
+
"""Entry in hopscotch hash table."""
|
25
|
+
|
26
|
+
def __init__(self, key: Any = None, value: Any = None):
|
27
|
+
"""Initialize entry."""
|
28
|
+
self.key = key
|
29
|
+
self.value = value
|
30
|
+
self.hop_info = 0 # Bitmap for neighborhood (32 bits)
|
31
|
+
|
32
|
+
def is_empty(self) -> bool:
|
33
|
+
"""Check if entry is empty."""
|
34
|
+
return self.key is None
|
35
|
+
|
36
|
+
|
37
|
+
class HopscotchHashStrategy(ANodeTreeStrategy):
|
38
|
+
"""
|
39
|
+
Hopscotch Hashing strategy for cache-friendly hash tables.
|
40
|
+
|
41
|
+
WHY Hopscotch Hashing:
|
42
|
+
- Better cache locality than cuckoo hashing
|
43
|
+
- Supports high load factors (>90%) efficiently
|
44
|
+
- Bounded search within neighborhood (H=32 typical)
|
45
|
+
- Predictable worst-case lookup time: O(H)
|
46
|
+
- Excellent for embedded systems and real-time applications
|
47
|
+
- Better resize behavior than linear probing
|
48
|
+
|
49
|
+
WHY this implementation:
|
50
|
+
- Hop bitmap (32-bit) enables fast neighborhood checking
|
51
|
+
- Linear displacement with bounded search maintains cache friendliness
|
52
|
+
- Power-of-2 table sizes enable fast modulo operations
|
53
|
+
- Lazy resizing balances memory and performance
|
54
|
+
- Neighborhood constant (H=32) fits in single cache line
|
55
|
+
|
56
|
+
Time Complexity:
|
57
|
+
- Insert: O(H) worst case where H is neighborhood size (32)
|
58
|
+
- Search: O(H) worst case, O(1) expected
|
59
|
+
- Delete: O(H) worst case, O(1) expected
|
60
|
+
- Resize: O(n) when load factor exceeded
|
61
|
+
|
62
|
+
Space Complexity: O(n / load_factor) typically O(1.1n) at 90% load
|
63
|
+
|
64
|
+
Trade-offs:
|
65
|
+
- Advantage: Better cache behavior than chaining or cuckoo
|
66
|
+
- Advantage: Predictable O(H) worst case (no unbounded probing)
|
67
|
+
- Advantage: High load factors (>90%) without degradation
|
68
|
+
- Limitation: Requires more complex insertion logic
|
69
|
+
- Limitation: Resize needed when neighborhood overfills
|
70
|
+
- Limitation: Slightly higher memory per entry (bitmap)
|
71
|
+
- Compared to HashMap (chaining): Better cache, more complex
|
72
|
+
- Compared to Cuckoo Hash: Better cache, simpler insertion
|
73
|
+
|
74
|
+
Best for:
|
75
|
+
- Cache-sensitive applications
|
76
|
+
- Embedded systems with memory constraints
|
77
|
+
- Real-time systems requiring bounded lookup times
|
78
|
+
- High load factor requirements (>85%)
|
79
|
+
- Frequent lookup operations
|
80
|
+
- Single-threaded environments
|
81
|
+
|
82
|
+
Not recommended for:
|
83
|
+
- Multi-threaded concurrent access (use lock-free alternatives)
|
84
|
+
- Extremely dynamic datasets (frequent resizes)
|
85
|
+
- When chaining simplicity is preferred
|
86
|
+
- Large value sizes (cache benefits diminish)
|
87
|
+
- Distributed hash tables
|
88
|
+
|
89
|
+
Following eXonware Priorities:
|
90
|
+
1. Security: Validates inputs, prevents hash collision attacks
|
91
|
+
2. Usability: Simple API matching standard dict, clear errors
|
92
|
+
3. Maintainability: Clear hop logic, well-documented neighborhoods
|
93
|
+
4. Performance: O(H) bounded time, cache-optimized
|
94
|
+
5. Extensibility: Easy to adjust H parameter, add probing strategies
|
95
|
+
|
96
|
+
Industry Best Practices:
|
97
|
+
- Follows Herlihy et al. hopscotch paper (2008)
|
98
|
+
- Uses H=32 for single cache line neighborhood
|
99
|
+
- Implements linear displacement with hop bitmap
|
100
|
+
- Provides automatic resizing at 90% load factor
|
101
|
+
- Supports dynamic table growth
|
102
|
+
"""
|
103
|
+
|
104
|
+
# Tree node type for classification
|
105
|
+
STRATEGY_TYPE: NodeType = NodeType.TREE
|
106
|
+
|
107
|
+
# Constants
|
108
|
+
DEFAULT_CAPACITY = 32
|
109
|
+
HOP_RANGE = 32 # Neighborhood size (must match bitmap width)
|
110
|
+
MAX_LOAD_FACTOR = 0.9
|
111
|
+
|
112
|
+
def __init__(self, mode: NodeMode = NodeMode.HOPSCOTCH_HASH,
|
113
|
+
traits: NodeTrait = NodeTrait.NONE,
|
114
|
+
initial_capacity: int = DEFAULT_CAPACITY, **options):
|
115
|
+
"""
|
116
|
+
Initialize hopscotch hash strategy.
|
117
|
+
|
118
|
+
Args:
|
119
|
+
mode: Node mode
|
120
|
+
traits: Node traits
|
121
|
+
initial_capacity: Initial table size (power of 2)
|
122
|
+
**options: Additional options
|
123
|
+
"""
|
124
|
+
super().__init__(mode, traits, **options)
|
125
|
+
|
126
|
+
# Ensure capacity is power of 2
|
127
|
+
self.capacity = self._next_power_of_2(max(initial_capacity, self.HOP_RANGE))
|
128
|
+
self._table: List[HopscotchEntry] = [HopscotchEntry() for _ in range(self.capacity)]
|
129
|
+
self._size = 0
|
130
|
+
|
131
|
+
def _next_power_of_2(self, n: int) -> int:
|
132
|
+
"""Get next power of 2 >= n."""
|
133
|
+
power = 1
|
134
|
+
while power < n:
|
135
|
+
power *= 2
|
136
|
+
return power
|
137
|
+
|
138
|
+
def get_supported_traits(self) -> NodeTrait:
|
139
|
+
"""Get supported traits."""
|
140
|
+
return NodeTrait.INDEXED | NodeTrait.FAST_INSERT | NodeTrait.FAST_DELETE
|
141
|
+
|
142
|
+
# ============================================================================
|
143
|
+
# CORE HASH OPERATIONS
|
144
|
+
# ============================================================================
|
145
|
+
|
146
|
+
def _hash(self, key: Any) -> int:
|
147
|
+
"""
|
148
|
+
Hash function with security considerations.
|
149
|
+
|
150
|
+
Args:
|
151
|
+
key: Key to hash
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
Hash value
|
155
|
+
|
156
|
+
WHY custom hash:
|
157
|
+
- Ensures uniform distribution
|
158
|
+
- Prevents hash collision attacks
|
159
|
+
- Compatible with power-of-2 sizing
|
160
|
+
"""
|
161
|
+
# Security: Use Python's hash with additional mixing
|
162
|
+
h = hash(key)
|
163
|
+
# Mixing function to reduce collisions
|
164
|
+
h ^= (h >> 16)
|
165
|
+
h *= 0x85ebca6b
|
166
|
+
h ^= (h >> 13)
|
167
|
+
h *= 0xc2b2ae35
|
168
|
+
h ^= (h >> 16)
|
169
|
+
return h & (self.capacity - 1)
|
170
|
+
|
171
|
+
def put(self, key: Any, value: Any = None) -> None:
|
172
|
+
"""
|
173
|
+
Insert or update key-value pair.
|
174
|
+
|
175
|
+
Args:
|
176
|
+
key: Key
|
177
|
+
value: Value
|
178
|
+
|
179
|
+
Raises:
|
180
|
+
XWNodeError: If insertion fails after displacement
|
181
|
+
"""
|
182
|
+
# Security: None key validation
|
183
|
+
if key is None:
|
184
|
+
raise XWNodeValueError("Key cannot be None")
|
185
|
+
|
186
|
+
# Check load factor
|
187
|
+
if self._size >= self.capacity * self.MAX_LOAD_FACTOR:
|
188
|
+
self._resize()
|
189
|
+
|
190
|
+
hash_idx = self._hash(key)
|
191
|
+
|
192
|
+
# Check if key already exists in neighborhood
|
193
|
+
for i in range(self.HOP_RANGE):
|
194
|
+
idx = (hash_idx + i) % self.capacity
|
195
|
+
if self._table[idx].key == key:
|
196
|
+
# Update existing
|
197
|
+
self._table[idx].value = value
|
198
|
+
return
|
199
|
+
|
200
|
+
# Find empty slot
|
201
|
+
free_idx = self._find_free_slot(hash_idx)
|
202
|
+
if free_idx is None:
|
203
|
+
# This shouldn't happen if load factor is maintained
|
204
|
+
self._resize()
|
205
|
+
self.put(key, value) # Retry after resize
|
206
|
+
return
|
207
|
+
|
208
|
+
# Move entry closer if needed using displacement
|
209
|
+
while free_idx - hash_idx >= self.HOP_RANGE:
|
210
|
+
# Find entry to displace
|
211
|
+
displaced = self._find_displacement_candidate(hash_idx, free_idx)
|
212
|
+
if displaced is None:
|
213
|
+
# Cannot displace, must resize
|
214
|
+
self._resize()
|
215
|
+
self.put(key, value)
|
216
|
+
return
|
217
|
+
|
218
|
+
# Swap positions
|
219
|
+
self._table[free_idx] = self._table[displaced]
|
220
|
+
self._table[displaced] = HopscotchEntry()
|
221
|
+
|
222
|
+
# Update hop bitmap
|
223
|
+
disp_hash = self._hash(self._table[free_idx].key)
|
224
|
+
self._table[disp_hash].hop_info &= ~(1 << (displaced - disp_hash))
|
225
|
+
self._table[disp_hash].hop_info |= (1 << (free_idx - disp_hash))
|
226
|
+
|
227
|
+
free_idx = displaced
|
228
|
+
|
229
|
+
# Insert at free slot
|
230
|
+
self._table[free_idx].key = key
|
231
|
+
self._table[free_idx].value = value
|
232
|
+
|
233
|
+
# Update hop bitmap
|
234
|
+
offset = free_idx - hash_idx
|
235
|
+
self._table[hash_idx].hop_info |= (1 << offset)
|
236
|
+
|
237
|
+
self._size += 1
|
238
|
+
|
239
|
+
def _find_free_slot(self, start: int) -> Optional[int]:
|
240
|
+
"""
|
241
|
+
Find free slot starting from index.
|
242
|
+
|
243
|
+
Args:
|
244
|
+
start: Starting index
|
245
|
+
|
246
|
+
Returns:
|
247
|
+
Index of free slot or None
|
248
|
+
"""
|
249
|
+
for i in range(self.capacity):
|
250
|
+
idx = (start + i) % self.capacity
|
251
|
+
if self._table[idx].is_empty():
|
252
|
+
return idx
|
253
|
+
return None
|
254
|
+
|
255
|
+
def _find_displacement_candidate(self, hash_idx: int, free_idx: int) -> Optional[int]:
|
256
|
+
"""
|
257
|
+
Find entry that can be displaced to bring free slot closer.
|
258
|
+
|
259
|
+
Args:
|
260
|
+
hash_idx: Original hash index
|
261
|
+
free_idx: Free slot index
|
262
|
+
|
263
|
+
Returns:
|
264
|
+
Index of entry to displace or None
|
265
|
+
"""
|
266
|
+
# Look for entries whose home is before free_idx
|
267
|
+
# and that currently occupy position within HOP_RANGE of free_idx
|
268
|
+
for i in range(self.HOP_RANGE - 1, 0, -1):
|
269
|
+
candidate_idx = (free_idx - i) % self.capacity
|
270
|
+
candidate_hash = self._hash(self._table[candidate_idx].key) if not self._table[candidate_idx].is_empty() else None
|
271
|
+
|
272
|
+
if candidate_hash is not None:
|
273
|
+
# Check if this entry can move to free_idx
|
274
|
+
if free_idx - candidate_hash < self.HOP_RANGE:
|
275
|
+
return candidate_idx
|
276
|
+
|
277
|
+
return None
|
278
|
+
|
279
|
+
def _resize(self) -> None:
|
280
|
+
"""
|
281
|
+
Resize table to double capacity.
|
282
|
+
|
283
|
+
WHY resize:
|
284
|
+
- Maintains load factor below threshold
|
285
|
+
- Prevents neighborhood overflow
|
286
|
+
- Ensures O(H) performance
|
287
|
+
"""
|
288
|
+
old_table = self._table
|
289
|
+
old_capacity = self.capacity
|
290
|
+
|
291
|
+
self.capacity = self.capacity * 2
|
292
|
+
self._table = [HopscotchEntry() for _ in range(self.capacity)]
|
293
|
+
self._size = 0
|
294
|
+
|
295
|
+
# Reinsert all entries
|
296
|
+
for entry in old_table:
|
297
|
+
if not entry.is_empty():
|
298
|
+
self.put(entry.key, entry.value)
|
299
|
+
|
300
|
+
def get(self, key: Any, default: Any = None) -> Any:
|
301
|
+
"""
|
302
|
+
Retrieve value by key.
|
303
|
+
|
304
|
+
Args:
|
305
|
+
key: Key
|
306
|
+
default: Default value
|
307
|
+
|
308
|
+
Returns:
|
309
|
+
Value or default
|
310
|
+
"""
|
311
|
+
if key is None:
|
312
|
+
return default
|
313
|
+
|
314
|
+
hash_idx = self._hash(key)
|
315
|
+
hop_info = self._table[hash_idx].hop_info
|
316
|
+
|
317
|
+
# Check neighborhood using bitmap
|
318
|
+
for i in range(self.HOP_RANGE):
|
319
|
+
if hop_info & (1 << i):
|
320
|
+
idx = (hash_idx + i) % self.capacity
|
321
|
+
if self._table[idx].key == key:
|
322
|
+
return self._table[idx].value
|
323
|
+
|
324
|
+
return default
|
325
|
+
|
326
|
+
def has(self, key: Any) -> bool:
|
327
|
+
"""Check if key exists."""
|
328
|
+
if key is None:
|
329
|
+
return False
|
330
|
+
|
331
|
+
hash_idx = self._hash(key)
|
332
|
+
hop_info = self._table[hash_idx].hop_info
|
333
|
+
|
334
|
+
# Check neighborhood
|
335
|
+
for i in range(self.HOP_RANGE):
|
336
|
+
if hop_info & (1 << i):
|
337
|
+
idx = (hash_idx + i) % self.capacity
|
338
|
+
if self._table[idx].key == key:
|
339
|
+
return True
|
340
|
+
|
341
|
+
return False
|
342
|
+
|
343
|
+
def delete(self, key: Any) -> bool:
|
344
|
+
"""
|
345
|
+
Remove key-value pair.
|
346
|
+
|
347
|
+
Args:
|
348
|
+
key: Key to remove
|
349
|
+
|
350
|
+
Returns:
|
351
|
+
True if deleted, False if not found
|
352
|
+
"""
|
353
|
+
if key is None:
|
354
|
+
return False
|
355
|
+
|
356
|
+
hash_idx = self._hash(key)
|
357
|
+
hop_info = self._table[hash_idx].hop_info
|
358
|
+
|
359
|
+
# Find in neighborhood
|
360
|
+
for i in range(self.HOP_RANGE):
|
361
|
+
if hop_info & (1 << i):
|
362
|
+
idx = (hash_idx + i) % self.capacity
|
363
|
+
if self._table[idx].key == key:
|
364
|
+
# Clear entry
|
365
|
+
self._table[idx] = HopscotchEntry()
|
366
|
+
|
367
|
+
# Update bitmap
|
368
|
+
self._table[hash_idx].hop_info &= ~(1 << i)
|
369
|
+
|
370
|
+
self._size -= 1
|
371
|
+
return True
|
372
|
+
|
373
|
+
return False
|
374
|
+
|
375
|
+
def keys(self) -> Iterator[Any]:
|
376
|
+
"""Get iterator over all keys."""
|
377
|
+
for entry in self._table:
|
378
|
+
if not entry.is_empty():
|
379
|
+
yield entry.key
|
380
|
+
|
381
|
+
def values(self) -> Iterator[Any]:
|
382
|
+
"""Get iterator over all values."""
|
383
|
+
for entry in self._table:
|
384
|
+
if not entry.is_empty():
|
385
|
+
yield entry.value
|
386
|
+
|
387
|
+
def items(self) -> Iterator[tuple[Any, Any]]:
|
388
|
+
"""Get iterator over all key-value pairs."""
|
389
|
+
for entry in self._table:
|
390
|
+
if not entry.is_empty():
|
391
|
+
yield (entry.key, entry.value)
|
392
|
+
|
393
|
+
def __len__(self) -> int:
|
394
|
+
"""Get number of elements."""
|
395
|
+
return self._size
|
396
|
+
|
397
|
+
def to_native(self) -> Any:
|
398
|
+
"""Convert to native dict."""
|
399
|
+
return dict(self.items())
|
400
|
+
|
401
|
+
# ============================================================================
|
402
|
+
# PERFORMANCE METHODS
|
403
|
+
# ============================================================================
|
404
|
+
|
405
|
+
def get_load_factor(self) -> float:
|
406
|
+
"""
|
407
|
+
Get current load factor.
|
408
|
+
|
409
|
+
Returns:
|
410
|
+
Load factor (0.0 to 1.0)
|
411
|
+
"""
|
412
|
+
return self._size / self.capacity if self.capacity > 0 else 0.0
|
413
|
+
|
414
|
+
def get_statistics(self) -> Dict[str, Any]:
|
415
|
+
"""
|
416
|
+
Get hash table statistics.
|
417
|
+
|
418
|
+
Returns:
|
419
|
+
Statistics including load factor, capacity, collisions
|
420
|
+
"""
|
421
|
+
# Count neighborhood usage
|
422
|
+
neighborhood_usage = []
|
423
|
+
for entry in self._table:
|
424
|
+
if not entry.is_empty():
|
425
|
+
bits_set = bin(entry.hop_info).count('1')
|
426
|
+
neighborhood_usage.append(bits_set)
|
427
|
+
|
428
|
+
avg_neighborhood = sum(neighborhood_usage) / len(neighborhood_usage) if neighborhood_usage else 0
|
429
|
+
|
430
|
+
return {
|
431
|
+
'size': self._size,
|
432
|
+
'capacity': self.capacity,
|
433
|
+
'load_factor': self.get_load_factor(),
|
434
|
+
'hop_range': self.HOP_RANGE,
|
435
|
+
'avg_neighborhood_size': avg_neighborhood,
|
436
|
+
'max_neighborhood_size': max(neighborhood_usage) if neighborhood_usage else 0
|
437
|
+
}
|
438
|
+
|
439
|
+
# ============================================================================
|
440
|
+
# UTILITY METHODS
|
441
|
+
# ============================================================================
|
442
|
+
|
443
|
+
def clear(self) -> None:
|
444
|
+
"""Clear all entries."""
|
445
|
+
self._table = [HopscotchEntry() for _ in range(self.capacity)]
|
446
|
+
self._size = 0
|
447
|
+
|
448
|
+
def is_empty(self) -> bool:
|
449
|
+
"""Check if empty."""
|
450
|
+
return self._size == 0
|
451
|
+
|
452
|
+
def size(self) -> int:
|
453
|
+
"""Get number of elements."""
|
454
|
+
return self._size
|
455
|
+
|
456
|
+
def get_mode(self) -> NodeMode:
|
457
|
+
"""Get strategy mode."""
|
458
|
+
return self.mode
|
459
|
+
|
460
|
+
def get_traits(self) -> NodeTrait:
|
461
|
+
"""Get strategy traits."""
|
462
|
+
return self.traits
|
463
|
+
|
464
|
+
# ============================================================================
|
465
|
+
# COMPATIBILITY METHODS
|
466
|
+
# ============================================================================
|
467
|
+
|
468
|
+
def find(self, key: Any) -> Optional[Any]:
|
469
|
+
"""Find value by key."""
|
470
|
+
return self.get(key)
|
471
|
+
|
472
|
+
def insert(self, key: Any, value: Any = None) -> None:
|
473
|
+
"""Insert key-value pair."""
|
474
|
+
self.put(key, value)
|
475
|
+
|
476
|
+
def __str__(self) -> str:
|
477
|
+
"""String representation."""
|
478
|
+
return (f"HopscotchHashStrategy(size={self._size}, capacity={self.capacity}, "
|
479
|
+
f"load={self.get_load_factor():.1%})")
|
480
|
+
|
481
|
+
def __repr__(self) -> str:
|
482
|
+
"""Detailed representation."""
|
483
|
+
return f"HopscotchHashStrategy(mode={self.mode.name}, size={self._size}, traits={self.traits})"
|
484
|
+
|
485
|
+
# ============================================================================
|
486
|
+
# FACTORY METHOD
|
487
|
+
# ============================================================================
|
488
|
+
|
489
|
+
@classmethod
|
490
|
+
def create_from_data(cls, data: Any, initial_capacity: int = DEFAULT_CAPACITY) -> 'HopscotchHashStrategy':
|
491
|
+
"""
|
492
|
+
Create hopscotch hash from data.
|
493
|
+
|
494
|
+
Args:
|
495
|
+
data: Dictionary or iterable
|
496
|
+
initial_capacity: Initial table size
|
497
|
+
|
498
|
+
Returns:
|
499
|
+
New HopscotchHashStrategy instance
|
500
|
+
"""
|
501
|
+
# Estimate good initial capacity
|
502
|
+
if isinstance(data, dict):
|
503
|
+
estimated_size = len(data)
|
504
|
+
elif isinstance(data, (list, tuple)):
|
505
|
+
estimated_size = len(data)
|
506
|
+
else:
|
507
|
+
estimated_size = 1
|
508
|
+
|
509
|
+
# Size for target load factor
|
510
|
+
capacity = int(estimated_size / cls.MAX_LOAD_FACTOR) + cls.HOP_RANGE
|
511
|
+
capacity = max(capacity, initial_capacity)
|
512
|
+
|
513
|
+
instance = cls(initial_capacity=capacity)
|
514
|
+
|
515
|
+
if isinstance(data, dict):
|
516
|
+
for key, value in data.items():
|
517
|
+
instance.put(key, value)
|
518
|
+
elif isinstance(data, (list, tuple)):
|
519
|
+
for i, value in enumerate(data):
|
520
|
+
instance.put(i, value)
|
521
|
+
else:
|
522
|
+
instance.put(0, data)
|
523
|
+
|
524
|
+
return instance
|
525
|
+
|
@@ -8,11 +8,12 @@ cardinality estimation with logarithmic space complexity.
|
|
8
8
|
from typing import Any, Iterator, List, Dict, Optional, Set
|
9
9
|
import hashlib
|
10
10
|
import math
|
11
|
-
from .
|
11
|
+
from .base import ANodeStrategy
|
12
12
|
from ...defs import NodeMode, NodeTrait
|
13
|
+
from .contracts import NodeType
|
13
14
|
|
14
15
|
|
15
|
-
class
|
16
|
+
class HyperLogLogStrategy(ANodeStrategy):
|
16
17
|
"""
|
17
18
|
HyperLogLog node strategy for cardinality estimation.
|
18
19
|
|
@@ -21,7 +22,7 @@ class xHyperLogLogStrategy(aNodeStrategy):
|
|
21
22
|
"""
|
22
23
|
|
23
24
|
# Strategy type classification
|
24
|
-
STRATEGY_TYPE = NodeType.
|
25
|
+
STRATEGY_TYPE = NodeType.HYBRID # Probabilistic cardinality estimation with hash buckets
|
25
26
|
|
26
27
|
|
27
28
|
def __init__(self, traits: NodeTrait = NodeTrait.NONE, **options):
|
@@ -254,8 +255,8 @@ class xHyperLogLogStrategy(aNodeStrategy):
|
|
254
255
|
raise ValueError("Cannot merge HyperLogLogs with different precision")
|
255
256
|
|
256
257
|
# Create new HyperLogLog
|
257
|
-
merged =
|
258
|
-
traits=self.
|
258
|
+
merged = HyperLogLogStrategy(
|
259
|
+
traits=self.traits,
|
259
260
|
precision=self.precision
|
260
261
|
)
|
261
262
|
|