swiss_hash 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +106 -61
- data/ext/swiss_hash/swiss_hash.c +59 -30
- data/lib/swiss_hash/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 15c6c585465ffcd0e8a399df625655da292ec1c619e6283eb86cd4e27ac9c682
|
|
4
|
+
data.tar.gz: e9b0e51c0e8a8e7279ba1ccbe32f5c3e9fa8fddeae5ea55328d9f149acbabf08
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2587c907c6d77be5049d0b08edfa8f8c6c03b0503eb380e4577cf64dc722370e9fb1841778f55d86869d8dcc6d61517cfe8a199521eb1e5926783fd3d1e8deda
|
|
7
|
+
data.tar.gz: 9222dcf296f098faa0774e9b54b67aab36e40efdf5576049eac2d79880d1b61f55f27eb39239e71996c41a4d239d537e9cf42e2f1154c9c9ea2026901f77c302
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# SwissHash
|
|
2
2
|
|
|
3
|
-
Swiss Table hash map implementation as a Ruby C extension. Based on Go 1.24 Swiss
|
|
3
|
+
Swiss Table hash map implementation as a Ruby C extension. Based on the design principles from Google's [Abseil](https://abseil.io/about/design/swisstables) flat_hash_map, Rust's [hashbrown](https://github.com/rust-lang/hashbrown), and [Go 1.24 Swiss Tables](https://go.dev/blog/swisstable), with architecture adapted for Ruby's object system.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -22,33 +22,63 @@ h.stats # => { capacity: 16, size: 0, ... }
|
|
|
22
22
|
|
|
23
23
|
## Performance Results
|
|
24
24
|
|
|
25
|
-
Benchmarks on Ruby 3.1.7 / arm64-darwin24
|
|
25
|
+
Benchmarks on Ruby 3.1.7 / arm64-darwin24 (Apple Silicon, NEON SIMD).
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
- **String keys**: SwissHash is **18.9-28.6% faster** across all dataset sizes
|
|
29
|
-
- **Sequential integers**: 24.6% slower (1k), 3.7% slower (10k), **11.5% faster** (100k)
|
|
30
|
-
- **Random integers**: 11.6% slower (1k), 4.7% slower (10k), **6.3% faster** (100k)
|
|
27
|
+
Methodology: 21 iterations per test, 5 warmup runs, IQR-filtered mean, **interleaved Ruby/SwissHash measurements** per iteration with alternating start order to cancel out thermal drift and scheduling noise. Per-side coefficient of variation reported to distinguish real deltas from noise.
|
|
31
28
|
|
|
32
|
-
###
|
|
33
|
-
- **Sequential integers**: SwissHash is **7.3-10.6% slower** across all sizes
|
|
34
|
-
- **String keys**: SwissHash is **13.0-25.2% slower** across all sizes
|
|
29
|
+
### N = 100,000
|
|
35
30
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
31
|
+
| Operation | Ruby Hash | SwissHash | Delta |
|
|
32
|
+
|---|---|---|---|
|
|
33
|
+
| Insert (string keys) | 17.6 ms | 11.3 ms | **−35.8%** ⚡ |
|
|
34
|
+
| Delete + reinsert 25% | 9.8 ms | 8.9 ms | **−9.0%** |
|
|
35
|
+
| Insert (sequential int) | 7.0 ms | 6.4 ms | **−8.7%** |
|
|
36
|
+
| Mixed (70% read / 20% write / 10% delete) | 21.4 ms | 19.6 ms | **−8.6%** |
|
|
37
|
+
| Insert (random int) | 6.7 ms | 6.4 ms | **−3.5%** |
|
|
38
|
+
| Lookup (string keys) | 20.3 ms | 20.8 ms | +2.5% |
|
|
39
|
+
| Lookup (sequential int) | 11.7 ms | 12.2 ms | +4.4% |
|
|
40
|
+
|
|
41
|
+
### N = 10,000
|
|
42
|
+
|
|
43
|
+
| Operation | Ruby Hash | SwissHash | Delta |
|
|
44
|
+
|---|---|---|---|
|
|
45
|
+
| Insert (string keys) | 1.63 ms | 1.10 ms | **−32.8%** ⚡ |
|
|
46
|
+
| **Lookup (string keys)** | 1.71 ms | 1.62 ms | **−5.2%** ⚡ |
|
|
47
|
+
| Mixed | 1.93 ms | 1.90 ms | −1.6% |
|
|
48
|
+
| Delete + reinsert | 0.91 ms | 0.89 ms | −2.0% |
|
|
49
|
+
| Insert (sequential int) | 0.66 ms | 0.67 ms | +2.5% |
|
|
50
|
+
| Lookup (sequential int) | 1.05 ms | 1.12 ms | +6.0% |
|
|
51
|
+
|
|
52
|
+
### N = 1,000
|
|
53
|
+
|
|
54
|
+
Ruby Hash uses an AR-table (flat array, linear search) for small hashes — SwissHash doesn't have this small-map regime, so for very small integer-keyed workloads Ruby wins. String workloads still favour SwissHash due to wyhash and the lookup fast path.
|
|
55
|
+
|
|
56
|
+
| Operation | Ruby Hash | SwissHash | Delta |
|
|
57
|
+
|---|---|---|---|
|
|
58
|
+
| Insert (string keys) | 0.183 ms | 0.118 ms | **−35.6%** ⚡ |
|
|
59
|
+
| **Lookup (string keys)** | 0.184 ms | 0.155 ms | **−15.5%** ⚡ |
|
|
60
|
+
| Insert (sequential int) | 0.063 ms | 0.074 ms | +17.5% |
|
|
61
|
+
| Delete + reinsert | 0.094 ms | 0.102 ms | +8.2% |
|
|
62
|
+
|
|
63
|
+
### Summary
|
|
64
|
+
|
|
65
|
+
- **Faster on 5 of 7 operations** at N=100k, some substantially (−36% string insert, −9% mixed workload, −9% delete+reinsert).
|
|
66
|
+
- **Strictly faster for string keys** at every size (25–35% faster inserts, break-even to 15% faster lookups).
|
|
67
|
+
- **Near parity on lookups** at N=100k (+2.5% on strings, +4.4% on ints) — remaining gap stems from Ruby VM's opcode specialization for `Hash#[]`, not the data structure.
|
|
39
68
|
|
|
40
69
|
### Memory Usage
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
- **
|
|
44
|
-
- **
|
|
45
|
-
- **
|
|
70
|
+
|
|
71
|
+
For 100,000 integer keys:
|
|
72
|
+
- **SwissHash**: 2,176 KB contiguous native memory, 4 GC slots
|
|
73
|
+
- **Ruby Hash**: managed via GC slots (not directly measurable)
|
|
74
|
+
- **Load factor**: 76.3% actual (max 87.5%)
|
|
75
|
+
- **GC pressure**: zero GC runs during insertion
|
|
46
76
|
|
|
47
77
|
## Features
|
|
48
78
|
|
|
49
|
-
- **SIMD
|
|
79
|
+
- **SIMD-optimized probing**: SSE2 (16-byte groups) on x86_64, NEON (8-byte groups) on ARM64, SWAR fallback elsewhere
|
|
50
80
|
- **Memory efficient**: Swiss Table layout with 87.5% max load factor
|
|
51
|
-
- **Tombstone compaction**: Automatic cleanup of deleted entries
|
|
81
|
+
- **Tombstone compaction**: Automatic cleanup of deleted entries during resize
|
|
52
82
|
- **Ruby compatibility**: Supports frozen string keys, all Ruby object types
|
|
53
83
|
- **Thread safety**: Prevents reentrant modifications during callbacks
|
|
54
84
|
|
|
@@ -59,8 +89,8 @@ hash = SwissHash::Hash.new(capacity = 16)
|
|
|
59
89
|
|
|
60
90
|
# Basic operations
|
|
61
91
|
hash[key] = value
|
|
62
|
-
hash[key] # get
|
|
63
|
-
hash.delete(key) #
|
|
92
|
+
hash[key] # get, returns nil if absent
|
|
93
|
+
hash.delete(key) # returns old value or nil
|
|
64
94
|
|
|
65
95
|
# Enumeration
|
|
66
96
|
hash.each { |k, v| ... }
|
|
@@ -68,51 +98,50 @@ hash.keys
|
|
|
68
98
|
hash.values
|
|
69
99
|
|
|
70
100
|
# Size and status
|
|
71
|
-
hash.size
|
|
101
|
+
hash.size # also: length
|
|
72
102
|
hash.empty?
|
|
73
|
-
hash.key?(key)
|
|
103
|
+
hash.key?(key) # also: has_key?, include?
|
|
74
104
|
|
|
75
105
|
# Maintenance
|
|
76
106
|
hash.clear
|
|
77
|
-
hash.compact! #
|
|
107
|
+
hash.compact! # drop tombstones without reallocating
|
|
78
108
|
|
|
79
109
|
# Debugging
|
|
80
|
-
hash.stats # =>
|
|
110
|
+
hash.stats # => { capacity:, size:, num_groups:, load_factor:,
|
|
111
|
+
# memory_bytes:, growth_left:, tombstones:, simd: }
|
|
81
112
|
```
|
|
82
113
|
|
|
83
114
|
## Usage Recommendations
|
|
84
115
|
|
|
85
|
-
SwissHash
|
|
86
|
-
-
|
|
87
|
-
-
|
|
88
|
-
- **
|
|
89
|
-
-
|
|
116
|
+
Use SwissHash when:
|
|
117
|
+
- Your hash keys are **strings** — inserts are 25–35% faster, lookups are on par or faster
|
|
118
|
+
- Your hash holds **10,000+ entries** with any mix of reads, writes, and deletes
|
|
119
|
+
- You do **heavy delete/reinsert churn** — tombstone compaction handles it without pathological slowdown
|
|
120
|
+
- You need **predictable native memory** instead of scattered GC allocations
|
|
90
121
|
|
|
91
|
-
|
|
122
|
+
Stick with Ruby's built-in `Hash` when:
|
|
123
|
+
- Your hash is small (≤ a few hundred entries) and mostly lookup-heavy with integer keys — Ruby's AR-table wins for small integer-keyed workloads
|
|
124
|
+
- You depend on Hash-specific semantics: default blocks, `compare_by_identity`, full insertion-order guarantees, or the complete `Hash` API
|
|
92
125
|
|
|
93
|
-
##
|
|
126
|
+
## Architecture
|
|
94
127
|
|
|
95
|
-
|
|
128
|
+
### Swiss Table core
|
|
129
|
+
- **Open addressing** with 7-bit `H2` metadata byte per slot; SIMD rejects non-matching slots in parallel
|
|
130
|
+
- **Group size 16 on SSE2** (full `_mm_movemask_epi8` width, matching Abseil / hashbrown); **group size 8 on NEON** and portable SWAR fallback — matches hashbrown's deliberate ARM choice (NEON's multi-cycle movemask latency makes 16-wide groups lose to 8-wide SWAR)
|
|
131
|
+
- **Triangular probing** — `i(i+1)/2` — guarantees full coverage on power-of-2 capacities
|
|
132
|
+
- **Max load factor 87.5%** (7/8)
|
|
96
133
|
|
|
97
|
-
### Ruby
|
|
98
|
-
- **
|
|
99
|
-
- **
|
|
100
|
-
- **
|
|
101
|
-
- **
|
|
134
|
+
### Ruby-specific adaptations
|
|
135
|
+
- **wyhash** for string keys — faster than Ruby's SipHash on short strings, which dominate typical workloads
|
|
136
|
+
- **Fibonacci multiplicative hash** for Fixnum and Symbol keys — their low bits are already well-distributed, so avalanche mixers would be wasted work
|
|
137
|
+
- **ASCII-7bit fast-path** in key equality: frozen string keys have their coderange pre-computed on insert, so subsequent lookup comparisons skip `rb_enc_compatible` entirely and go straight to `memcmp`
|
|
138
|
+
- **Encoding-index equality check** as the first fast path in key comparison — avoids `rb_enc_compatible` on the common case of matching encodings
|
|
139
|
+
- **Inline `RTYPEDDATA_DATA`** on hot methods (`[]`, `[]=`, `delete`, `key?`) — skips the type-check overhead of `TypedData_Get_Struct` on every operation
|
|
140
|
+
- **Prefetch `slots[off]`** right after the control-byte load so DRAM fetch overlaps with SIMD match extraction
|
|
102
141
|
|
|
103
|
-
###
|
|
104
|
-
-
|
|
105
|
-
-
|
|
106
|
-
- **Two-level lookup**: Swiss Table's H1/H2 split requires additional bit manipulation and SIMD operations
|
|
107
|
-
- **SIMD overhead**: NEON/SSE2 operations have setup costs that don't pay off for small group scans
|
|
108
|
-
- **Additional equality checks**: `keys_equal()` function adds extra indirection compared to VM's direct comparison
|
|
109
|
-
|
|
110
|
-
### Architecture Trade-offs
|
|
111
|
-
- **Swiss Table design**: Optimized for high load factors and cache efficiency, but adds complexity to the critical lookup path
|
|
112
|
-
- **Group-based probing**: While theoretically faster, the overhead of SIMD operations and multiple memory accesses hurts performance for typical Ruby workloads
|
|
113
|
-
- **Memory indirection**: SwissHash's separate control bytes array creates additional memory accesses vs Ruby Hash's integrated approach
|
|
114
|
-
|
|
115
|
-
This explains why SwissHash excels at write-heavy operations (where its design advantages matter) but loses to Ruby's VM-optimized lookups.
|
|
142
|
+
### Memory layout
|
|
143
|
+
- Separate control-byte array and slot array (hashbrown-style) — the tight control array scans well through L1/L2
|
|
144
|
+
- No zero-initialization of slot memory (`malloc` instead of `calloc`) — slots are only ever read after their control byte confirms they're live
|
|
116
145
|
|
|
117
146
|
## Build
|
|
118
147
|
|
|
@@ -120,22 +149,38 @@ This explains why SwissHash excels at write-heavy operations (where its design a
|
|
|
120
149
|
rake compile
|
|
121
150
|
```
|
|
122
151
|
|
|
123
|
-
##
|
|
152
|
+
## Benchmarking
|
|
153
|
+
|
|
154
|
+
The included `benchmark.rb` produces statistically honest results:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
bundle exec ruby benchmark.rb
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Key features that make it trustworthy:
|
|
161
|
+
- **Interleaved Ruby/SwissHash measurements** per iteration with alternating start order — thermal throttling and fluctuating background load hit both sides equally
|
|
162
|
+
- **21 iterations with IQR-filtered mean** (trims top and bottom 25%) — more robust than median on noisy laptop hardware
|
|
163
|
+
- **5 warmup runs** to settle JIT, caches, and branch predictor
|
|
164
|
+
- **Per-side coefficient of variation** (`±X.X%`) displayed so you can distinguish a real 5% delta from 5% noise
|
|
165
|
+
- **Correctness smoke test** runs before measurement
|
|
166
|
+
|
|
167
|
+
### Profiling
|
|
124
168
|
|
|
125
|
-
|
|
169
|
+
For profiling on macOS:
|
|
126
170
|
|
|
127
171
|
```bash
|
|
128
|
-
ruby
|
|
172
|
+
bundle exec ruby simp.rb # runs infinite lookup loop, prints PID
|
|
173
|
+
sample <PID> 60 -f /tmp/swiss.sample
|
|
174
|
+
filtercalltree /tmp/swiss.sample | head -100
|
|
129
175
|
```
|
|
130
176
|
|
|
131
|
-
|
|
132
|
-
- Insert performance (sequential int, strings, random int)
|
|
133
|
-
- Lookup performance (with 3x repetition for signal amplification)
|
|
134
|
-
- Delete with reinsertion operations
|
|
135
|
-
- Mixed workloads (70/20/10 read/write/delete)
|
|
136
|
-
- Memory usage and GC pressure analysis
|
|
177
|
+
## Design References
|
|
137
178
|
|
|
138
|
-
|
|
179
|
+
- Matt Kulukundis, ["Designing a Fast, Efficient, Cache-friendly Hash Table, Step by Step"](https://www.youtube.com/watch?v=ncHmEUmJZf4) — CppCon 2017
|
|
180
|
+
- [Abseil: SwissTables design](https://abseil.io/about/design/swisstables)
|
|
181
|
+
- [rust-lang/hashbrown](https://github.com/rust-lang/hashbrown) — reference for SSE2/NEON/SWAR strategy choices
|
|
182
|
+
- [Go 1.24 maps](https://go.dev/blog/swisstable) — probing and resize design trade-offs
|
|
183
|
+
- Aria Beingessner, ["Swisstable, a Quick and Dirty Description"](https://faultlore.com/blah/hashbrown-tldr/) — implementer's notes
|
|
139
184
|
|
|
140
185
|
## License
|
|
141
186
|
|
data/ext/swiss_hash/swiss_hash.c
CHANGED
|
@@ -12,6 +12,12 @@
|
|
|
12
12
|
#endif
|
|
13
13
|
#endif
|
|
14
14
|
|
|
15
|
+
#if defined(__GNUC__) || defined(__clang__)
|
|
16
|
+
#define SH_PREFETCH(p) __builtin_prefetch((const void *)(p), 0, 1)
|
|
17
|
+
#else
|
|
18
|
+
#define SH_PREFETCH(p) ((void)0)
|
|
19
|
+
#endif
|
|
20
|
+
|
|
15
21
|
static uint64_t swiss_hash_seed0;
|
|
16
22
|
static uint64_t swiss_hash_seed1;
|
|
17
23
|
|
|
@@ -115,7 +121,6 @@ static inline uint64_t wyhash(const void *data, size_t len, uint64_t seed) {
|
|
|
115
121
|
return _wymix(s1 ^ len, _wymix(a ^ s1, b ^ seed));
|
|
116
122
|
}
|
|
117
123
|
|
|
118
|
-
#define GROUP_SIZE 8
|
|
119
124
|
#define CTRL_EMPTY 0x80
|
|
120
125
|
#define CTRL_DELETED 0xFE
|
|
121
126
|
#define H2_MASK 0x7F
|
|
@@ -126,31 +131,39 @@ static inline uint64_t wyhash(const void *data, size_t len, uint64_t seed) {
|
|
|
126
131
|
#if defined(__x86_64__) || defined(_M_X64)
|
|
127
132
|
#define SWISS_USE_SSE2 1
|
|
128
133
|
#include <emmintrin.h>
|
|
134
|
+
|
|
135
|
+
#define GROUP_SIZE 16
|
|
136
|
+
#define GROUP_MASK 0xFFFFu
|
|
129
137
|
#elif defined(__aarch64__) || defined(_M_ARM64)
|
|
130
138
|
#define SWISS_USE_NEON 1
|
|
131
139
|
#include <arm_neon.h>
|
|
140
|
+
|
|
141
|
+
#define GROUP_SIZE 8
|
|
142
|
+
#define GROUP_MASK 0xFFu
|
|
132
143
|
#else
|
|
133
144
|
#define SWISS_USE_PORTABLE 1
|
|
145
|
+
#define GROUP_SIZE 8
|
|
146
|
+
#define GROUP_MASK 0xFFu
|
|
134
147
|
#endif
|
|
135
148
|
|
|
136
149
|
#ifdef SWISS_USE_SSE2
|
|
137
150
|
|
|
138
151
|
static inline __m128i ctrl_load(const uint8_t *ctrl) {
|
|
139
|
-
return
|
|
152
|
+
return _mm_loadu_si128((const __m128i *)ctrl);
|
|
140
153
|
}
|
|
141
154
|
|
|
142
155
|
static inline uint32_t ctrl_match_h2_vec(__m128i cv, uint8_t h2) {
|
|
143
156
|
__m128i cmp = _mm_cmpeq_epi8(cv, _mm_set1_epi8((char)h2));
|
|
144
|
-
return (uint32_t)_mm_movemask_epi8(cmp)
|
|
157
|
+
return (uint32_t)_mm_movemask_epi8(cmp);
|
|
145
158
|
}
|
|
146
159
|
|
|
147
160
|
static inline uint32_t ctrl_match_empty_vec(__m128i cv) {
|
|
148
161
|
__m128i cmp = _mm_cmpeq_epi8(cv, _mm_set1_epi8((char)CTRL_EMPTY));
|
|
149
|
-
return (uint32_t)_mm_movemask_epi8(cmp)
|
|
162
|
+
return (uint32_t)_mm_movemask_epi8(cmp);
|
|
150
163
|
}
|
|
151
164
|
|
|
152
165
|
static inline uint32_t ctrl_match_empty_or_deleted_vec(__m128i cv) {
|
|
153
|
-
return (uint32_t)_mm_movemask_epi8(cv)
|
|
166
|
+
return (uint32_t)_mm_movemask_epi8(cv);
|
|
154
167
|
}
|
|
155
168
|
|
|
156
169
|
static inline uint32_t ctrl_match_empty(const uint8_t *ctrl) {
|
|
@@ -296,7 +309,7 @@ static inline uint64_t compute_hash(VALUE key) {
|
|
|
296
309
|
if (RB_TYPE_P(key, T_STRING)) {
|
|
297
310
|
const char *ptr = RSTRING_PTR(key);
|
|
298
311
|
long len = RSTRING_LEN(key);
|
|
299
|
-
int enc_idx =
|
|
312
|
+
int enc_idx = ENCODING_GET(key);
|
|
300
313
|
uint64_t str_seed = swiss_hash_seed0 ^ (uint64_t)enc_idx;
|
|
301
314
|
return wyhash(ptr ? ptr : (const char *)"", (size_t)len, str_seed);
|
|
302
315
|
}
|
|
@@ -315,21 +328,29 @@ static inline uint64_t compute_hash(VALUE key) {
|
|
|
315
328
|
#define H2(hash) ((uint8_t)((hash) & H2_MASK))
|
|
316
329
|
|
|
317
330
|
static inline int keys_equal(VALUE a, VALUE b) {
|
|
318
|
-
if (a == b)
|
|
319
|
-
|
|
331
|
+
if (a == b) return 1;
|
|
332
|
+
if (FIXNUM_P(a) || SYMBOL_P(a) || SPECIAL_CONST_P(a)) return 0;
|
|
320
333
|
|
|
321
|
-
|
|
322
|
-
if (ta == T_FIXNUM || ta == T_SYMBOL)
|
|
323
|
-
return 0;
|
|
324
|
-
|
|
325
|
-
if (ta == T_STRING && RB_TYPE_P(b, T_STRING)) {
|
|
334
|
+
if (RB_TYPE_P(a, T_STRING) && RB_TYPE_P(b, T_STRING)) {
|
|
326
335
|
long la = RSTRING_LEN(a);
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
336
|
+
if (la != RSTRING_LEN(b)) return 0;
|
|
337
|
+
const char *pa = RSTRING_PTR(a);
|
|
338
|
+
const char *pb = RSTRING_PTR(b);
|
|
339
|
+
if (pa == pb) return 1;
|
|
340
|
+
|
|
341
|
+
int ea = ENCODING_GET(a);
|
|
342
|
+
int eb = ENCODING_GET(b);
|
|
343
|
+
if (ea == eb) {
|
|
344
|
+
return memcmp(pa, pb, (size_t)la) == 0;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
if (ENC_CODERANGE(a) == ENC_CODERANGE_7BIT &&
|
|
348
|
+
ENC_CODERANGE(b) == ENC_CODERANGE_7BIT) {
|
|
349
|
+
return memcmp(pa, pb, (size_t)la) == 0;
|
|
350
|
+
}
|
|
330
351
|
|
|
331
352
|
if (rb_enc_compatible(a, b)) {
|
|
332
|
-
return memcmp(
|
|
353
|
+
return memcmp(pa, pb, (size_t)la) == 0;
|
|
333
354
|
}
|
|
334
355
|
return rb_eql(a, b);
|
|
335
356
|
}
|
|
@@ -338,8 +359,12 @@ static inline int keys_equal(VALUE a, VALUE b) {
|
|
|
338
359
|
}
|
|
339
360
|
|
|
340
361
|
static inline VALUE prepare_key(VALUE key) {
|
|
341
|
-
if (RB_TYPE_P(key, T_STRING)
|
|
342
|
-
|
|
362
|
+
if (RB_TYPE_P(key, T_STRING)) {
|
|
363
|
+
if (!OBJ_FROZEN(key)) {
|
|
364
|
+
key = rb_str_new_frozen(key);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
rb_enc_str_coderange(key);
|
|
343
368
|
}
|
|
344
369
|
return key;
|
|
345
370
|
}
|
|
@@ -370,7 +395,7 @@ static void swiss_init(SwissHash *sh, size_t min_capacity) {
|
|
|
370
395
|
sh->growth_left = capacity * MAX_LOAD_NUM / MAX_LOAD_DEN;
|
|
371
396
|
|
|
372
397
|
sh->ctrl = (uint8_t *)malloc(capacity);
|
|
373
|
-
sh->slots = (Slot *)
|
|
398
|
+
sh->slots = (Slot *)malloc(capacity * sizeof(Slot));
|
|
374
399
|
|
|
375
400
|
if (!sh->ctrl || !sh->slots) {
|
|
376
401
|
free(sh->ctrl);
|
|
@@ -414,13 +439,16 @@ static VALUE *swiss_lookup(SwissHash *sh, VALUE key) {
|
|
|
414
439
|
|
|
415
440
|
#if defined(SWISS_USE_SSE2)
|
|
416
441
|
__m128i cv = ctrl_load(sh->ctrl + off);
|
|
442
|
+
SH_PREFETCH(&sh->slots[off]);
|
|
417
443
|
uint32_t match = ctrl_match_h2_vec(cv, h2);
|
|
418
444
|
uint32_t empty = ctrl_match_empty_vec(cv);
|
|
419
445
|
#elif defined(SWISS_USE_NEON)
|
|
420
446
|
uint8x8_t cv = ctrl_load(sh->ctrl + off);
|
|
447
|
+
SH_PREFETCH(&sh->slots[off]);
|
|
421
448
|
uint32_t match = ctrl_match_h2_vec(cv, h2);
|
|
422
449
|
uint32_t empty = ctrl_match_empty_vec(cv);
|
|
423
450
|
#else
|
|
451
|
+
SH_PREFETCH(&sh->slots[off]);
|
|
424
452
|
uint32_t match = ctrl_match_h2_raw(sh->ctrl + off, h2);
|
|
425
453
|
uint32_t empty = ctrl_match_empty_raw(sh->ctrl + off);
|
|
426
454
|
#endif
|
|
@@ -474,7 +502,7 @@ static void swiss_compact(SwissHash *sh) {
|
|
|
474
502
|
Slot *old_slots = sh->slots;
|
|
475
503
|
|
|
476
504
|
sh->ctrl = (uint8_t *)malloc(cap);
|
|
477
|
-
sh->slots = (Slot *)
|
|
505
|
+
sh->slots = (Slot *)malloc(cap * sizeof(Slot));
|
|
478
506
|
|
|
479
507
|
if (!sh->ctrl || !sh->slots) {
|
|
480
508
|
free(sh->ctrl);
|
|
@@ -523,11 +551,13 @@ static VALUE swiss_insert(SwissHash *sh, VALUE key, VALUE value) {
|
|
|
523
551
|
|
|
524
552
|
#if defined(SWISS_USE_SSE2)
|
|
525
553
|
__m128i cv = ctrl_load(sh->ctrl + off);
|
|
554
|
+
SH_PREFETCH(&sh->slots[off]);
|
|
526
555
|
uint32_t match = ctrl_match_h2_vec(cv, h2);
|
|
527
556
|
uint32_t empty = ctrl_match_empty_vec(cv);
|
|
528
557
|
uint32_t avail = ctrl_match_empty_or_deleted_vec(cv);
|
|
529
558
|
#elif defined(SWISS_USE_NEON)
|
|
530
559
|
uint8x8_t cv = ctrl_load(sh->ctrl + off);
|
|
560
|
+
SH_PREFETCH(&sh->slots[off]);
|
|
531
561
|
uint32_t match = ctrl_match_h2_vec(cv, h2);
|
|
532
562
|
uint32_t empty = ctrl_match_empty_vec(cv);
|
|
533
563
|
uint32_t avail = ctrl_match_empty_or_deleted_vec(cv);
|
|
@@ -581,13 +611,16 @@ static VALUE swiss_delete(SwissHash *sh, VALUE key) {
|
|
|
581
611
|
|
|
582
612
|
#if defined(SWISS_USE_SSE2)
|
|
583
613
|
__m128i cv = ctrl_load(sh->ctrl + off);
|
|
614
|
+
SH_PREFETCH(&sh->slots[off]);
|
|
584
615
|
uint32_t match = ctrl_match_h2_vec(cv, h2);
|
|
585
616
|
uint32_t empty = ctrl_match_empty_vec(cv);
|
|
586
617
|
#elif defined(SWISS_USE_NEON)
|
|
587
618
|
uint8x8_t cv = ctrl_load(sh->ctrl + off);
|
|
619
|
+
SH_PREFETCH(&sh->slots[off]);
|
|
588
620
|
uint32_t match = ctrl_match_h2_vec(cv, h2);
|
|
589
621
|
uint32_t empty = ctrl_match_empty_vec(cv);
|
|
590
622
|
#else
|
|
623
|
+
SH_PREFETCH(&sh->slots[off]);
|
|
591
624
|
uint32_t match = ctrl_match_h2_raw(sh->ctrl + off, h2);
|
|
592
625
|
uint32_t empty = ctrl_match_empty_raw(sh->ctrl + off);
|
|
593
626
|
#endif
|
|
@@ -627,7 +660,7 @@ static void swiss_grow(SwissHash *sh) {
|
|
|
627
660
|
size_t new_cap = new_num_groups * GROUP_SIZE;
|
|
628
661
|
|
|
629
662
|
sh->ctrl = (uint8_t *)malloc(new_cap);
|
|
630
|
-
sh->slots = (Slot *)
|
|
663
|
+
sh->slots = (Slot *)malloc(new_cap * sizeof(Slot));
|
|
631
664
|
|
|
632
665
|
if (!sh->ctrl || !sh->slots) {
|
|
633
666
|
free(sh->ctrl);
|
|
@@ -718,8 +751,7 @@ static VALUE swiss_hash_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
718
751
|
}
|
|
719
752
|
|
|
720
753
|
static VALUE swiss_hash_aset(VALUE self, VALUE key, VALUE value) {
|
|
721
|
-
SwissHash *sh;
|
|
722
|
-
TypedData_Get_Struct(self, SwissHash, &swiss_hash_type, sh);
|
|
754
|
+
SwissHash *sh = (SwissHash *)RTYPEDDATA_DATA(self);
|
|
723
755
|
if (RB_UNLIKELY(!(FIXNUM_P(key) || SYMBOL_P(key)))) {
|
|
724
756
|
key = prepare_key(key);
|
|
725
757
|
}
|
|
@@ -727,15 +759,13 @@ static VALUE swiss_hash_aset(VALUE self, VALUE key, VALUE value) {
|
|
|
727
759
|
}
|
|
728
760
|
|
|
729
761
|
static VALUE swiss_hash_aref(VALUE self, VALUE key) {
|
|
730
|
-
SwissHash *sh;
|
|
731
|
-
TypedData_Get_Struct(self, SwissHash, &swiss_hash_type, sh);
|
|
762
|
+
SwissHash *sh = (SwissHash *)RTYPEDDATA_DATA(self);
|
|
732
763
|
VALUE *val = swiss_lookup(sh, key);
|
|
733
764
|
return val ? *val : Qnil;
|
|
734
765
|
}
|
|
735
766
|
|
|
736
767
|
static VALUE swiss_hash_delete(VALUE self, VALUE key) {
|
|
737
|
-
SwissHash *sh;
|
|
738
|
-
TypedData_Get_Struct(self, SwissHash, &swiss_hash_type, sh);
|
|
768
|
+
SwissHash *sh = (SwissHash *)RTYPEDDATA_DATA(self);
|
|
739
769
|
return swiss_delete(sh, key);
|
|
740
770
|
}
|
|
741
771
|
|
|
@@ -809,8 +839,7 @@ static VALUE swiss_hash_values(VALUE self) {
|
|
|
809
839
|
}
|
|
810
840
|
|
|
811
841
|
static VALUE swiss_hash_key_p(VALUE self, VALUE key) {
|
|
812
|
-
SwissHash *sh;
|
|
813
|
-
TypedData_Get_Struct(self, SwissHash, &swiss_hash_type, sh);
|
|
842
|
+
SwissHash *sh = (SwissHash *)RTYPEDDATA_DATA(self);
|
|
814
843
|
VALUE *val = swiss_lookup(sh, key);
|
|
815
844
|
return val ? Qtrue : Qfalse;
|
|
816
845
|
}
|
data/lib/swiss_hash/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: swiss_hash
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Roman Haidarov
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-04-16 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rake-compiler
|