simd_string_upcase 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ext/simd_string_upcase/extconf.rb +42 -0
- data/ext/simd_string_upcase/simd_string_upcase.c +186 -0
- data/lib/simd_string_upcase.rb +9 -0
- metadata +76 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 2561a3a78349ade559c6835afa0a89c0727204a8aaa68a9721791c1824d31c54
|
4
|
+
data.tar.gz: 496036b130fd6c0394095d7594d35e29f3986efa2e4d403aef69ff2fcbe4ba56
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 180089664068d8e33af317b91aca92d27974a2301afc3183fc0593263e01357dc14eb8f16bcbcf871a0af45eca16a9cd394ccd0d653605dcc1064a0ad1aad9fd
|
7
|
+
data.tar.gz: 5227eaee9a5605ea3bbe75fd66e323e148e98deaac90a80e8f35c475bcf8df1c195dcb78784bbeaa8142fc98b6bdc0ecbf5b7c1d425156f6d4cde37eb5c82b67
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
# Check for ARM64 architecture
|
4
|
+
if RUBY_PLATFORM =~ /arm64|aarch64/
|
5
|
+
raise "SIMD string upcase gem is not supported on ARM64 architecture yet."
|
6
|
+
end
|
7
|
+
|
8
|
+
have_header('emmintrin.h')
|
9
|
+
have_header('immintrin.h')
|
10
|
+
|
11
|
+
# Force enable AVX and AVX2 on macOS for known supporting architectures
|
12
|
+
if RUBY_PLATFORM =~ /darwin/
|
13
|
+
$CFLAGS << ' -mavx -mavx2'
|
14
|
+
puts "macOS detected. Forcing AVX and AVX2 support."
|
15
|
+
else
|
16
|
+
have_sse2 = have_macro('__SSE2__', 'emmintrin.h')
|
17
|
+
have_avx = have_macro('__AVX__', 'immintrin.h')
|
18
|
+
have_avx2 = have_macro('__AVX2__', 'immintrin.h')
|
19
|
+
|
20
|
+
if have_sse2
|
21
|
+
$CFLAGS << ' -msse2'
|
22
|
+
puts "SSE2 support detected and enabled."
|
23
|
+
else
|
24
|
+
puts "Warning: SSE2 not supported. The SSE2 version may not work correctly."
|
25
|
+
end
|
26
|
+
|
27
|
+
if have_avx
|
28
|
+
$CFLAGS << ' -mavx'
|
29
|
+
puts "AVX support detected and enabled."
|
30
|
+
else
|
31
|
+
puts "Warning: AVX not supported. The AVX1 version may not work correctly."
|
32
|
+
end
|
33
|
+
|
34
|
+
if have_avx2
|
35
|
+
$CFLAGS << ' -mavx2'
|
36
|
+
puts "AVX2 support detected and enabled."
|
37
|
+
else
|
38
|
+
puts "Warning: AVX2 not supported. The AVX2 version may not work correctly."
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
create_makefile('simd_string_upcase/simd_string_upcase')
|
@@ -0,0 +1,186 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <emmintrin.h> // For SSE2 intrinsics
|
3
|
+
#include <immintrin.h> // For AVX and AVX2 intrinsics
|
4
|
+
|
5
|
+
#ifdef __APPLE__
|
6
|
+
#include <sys/types.h>
|
7
|
+
#include <sys/sysctl.h>
|
8
|
+
#else
|
9
|
+
#include <cpuid.h>
|
10
|
+
#endif
|
11
|
+
|
12
|
+
static int has_sse2 = 0, has_avx = 0, has_avx2 = 0;
|
13
|
+
|
14
|
+
#ifdef __APPLE__
|
15
|
+
// Function to detect CPU features on macOS
|
16
|
+
static void detect_cpu_features() {
|
17
|
+
size_t sse2_len = sizeof(has_sse2);
|
18
|
+
size_t avx_len = sizeof(has_avx);
|
19
|
+
size_t avx2_len = sizeof(has_avx2);
|
20
|
+
|
21
|
+
sysctlbyname("hw.optional.sse2", &has_sse2, &sse2_len, NULL, 0);
|
22
|
+
sysctlbyname("hw.optional.avx1_0", &has_avx, &avx_len, NULL, 0);
|
23
|
+
sysctlbyname("hw.optional.avx2_0", &has_avx2, &avx2_len, NULL, 0);
|
24
|
+
}
|
25
|
+
#else
|
26
|
+
// Function to detect CPU features on Linux
|
27
|
+
static void detect_cpu_features() {
|
28
|
+
unsigned int eax, ebx, ecx, edx;
|
29
|
+
|
30
|
+
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
|
31
|
+
has_sse2 = edx & (1 << 26);
|
32
|
+
has_avx = ecx & (1 << 28);
|
33
|
+
}
|
34
|
+
|
35
|
+
if (has_avx && __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx)) {
|
36
|
+
has_avx2 = ebx & (1 << 5);
|
37
|
+
}
|
38
|
+
}
|
39
|
+
#endif
|
40
|
+
|
41
|
+
static VALUE upcase_sse2(VALUE self, VALUE str) {
|
42
|
+
char *text = StringValuePtr(str);
|
43
|
+
long len = RSTRING_LEN(str);
|
44
|
+
VALUE result = rb_str_new(NULL, len);
|
45
|
+
char *res_text = StringValuePtr(result);
|
46
|
+
|
47
|
+
__m128i lower_a = _mm_set1_epi8('a');
|
48
|
+
__m128i lower_z = _mm_set1_epi8('z');
|
49
|
+
__m128i diff = _mm_set1_epi8('a' - 'A');
|
50
|
+
|
51
|
+
long i;
|
52
|
+
for (i = 0; i <= len - 16; i += 16) {
|
53
|
+
__m128i chunk = _mm_loadu_si128((__m128i*)(text + i));
|
54
|
+
__m128i mask_a = _mm_cmpgt_epi8(chunk, _mm_sub_epi8(lower_a, _mm_set1_epi8(1)));
|
55
|
+
__m128i mask_z = _mm_cmplt_epi8(chunk, _mm_add_epi8(lower_z, _mm_set1_epi8(1)));
|
56
|
+
__m128i mask = _mm_and_si128(mask_a, mask_z);
|
57
|
+
chunk = _mm_sub_epi8(chunk, _mm_and_si128(mask, diff));
|
58
|
+
_mm_storeu_si128((__m128i*)(res_text + i), chunk);
|
59
|
+
}
|
60
|
+
|
61
|
+
for (; i < len; i++) {
|
62
|
+
if (text[i] >= 'a' && text[i] <= 'z') {
|
63
|
+
res_text[i] = text[i] - ('a' - 'A');
|
64
|
+
} else {
|
65
|
+
res_text[i] = text[i];
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
return result;
|
70
|
+
}
|
71
|
+
|
72
|
+
static VALUE upcase_avx1(VALUE self, VALUE str) {
|
73
|
+
char *text = StringValuePtr(str);
|
74
|
+
long len = RSTRING_LEN(str);
|
75
|
+
VALUE result = rb_str_new(NULL, len);
|
76
|
+
char *res_text = StringValuePtr(result);
|
77
|
+
|
78
|
+
__m256i lower_a = _mm256_set1_epi8('a');
|
79
|
+
__m256i lower_z = _mm256_set1_epi8('z');
|
80
|
+
__m256i diff = _mm256_set1_epi8('a' - 'A');
|
81
|
+
|
82
|
+
long i;
|
83
|
+
for (i = 0; i <= len - 32; i += 32) {
|
84
|
+
__m256i chunk = _mm256_loadu_si256((__m256i*)(text + i));
|
85
|
+
__m256i mask_a = _mm256_cmpgt_epi8(chunk, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
|
86
|
+
__m256i mask_z = _mm256_cmpgt_epi8(_mm256_set1_epi8('z' + 1), chunk);
|
87
|
+
__m256i mask = _mm256_and_si256(mask_a, mask_z);
|
88
|
+
chunk = _mm256_sub_epi8(chunk, _mm256_and_si256(mask, diff));
|
89
|
+
_mm256_storeu_si256((__m256i*)(res_text + i), chunk);
|
90
|
+
}
|
91
|
+
|
92
|
+
for (; i < len; i++) {
|
93
|
+
if (text[i] >= 'a' && text[i] <= 'z') {
|
94
|
+
res_text[i] = text[i] - 32;
|
95
|
+
} else {
|
96
|
+
res_text[i] = text[i];
|
97
|
+
}
|
98
|
+
}
|
99
|
+
|
100
|
+
return result;
|
101
|
+
}
|
102
|
+
|
103
|
+
static VALUE upcase_avx2(VALUE self, VALUE str) {
|
104
|
+
char *text = StringValuePtr(str);
|
105
|
+
long len = RSTRING_LEN(str);
|
106
|
+
VALUE result = rb_str_new(NULL, len);
|
107
|
+
char *res_text = StringValuePtr(result);
|
108
|
+
|
109
|
+
__m256i lower_a = _mm256_set1_epi8('a');
|
110
|
+
__m256i lower_z = _mm256_set1_epi8('z');
|
111
|
+
__m256i diff = _mm256_set1_epi8('a' - 'A');
|
112
|
+
|
113
|
+
long i;
|
114
|
+
for (i = 0; i <= len - 64; i += 64) {
|
115
|
+
__m256i chunk1 = _mm256_loadu_si256((__m256i*)(text + i));
|
116
|
+
__m256i chunk2 = _mm256_loadu_si256((__m256i*)(text + i + 32));
|
117
|
+
|
118
|
+
__m256i mask1_a = _mm256_cmpgt_epi8(chunk1, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
|
119
|
+
__m256i mask1_z = _mm256_cmpgt_epi8(_mm256_set1_epi8('z' + 1), chunk1);
|
120
|
+
__m256i mask1 = _mm256_and_si256(mask1_a, mask1_z);
|
121
|
+
|
122
|
+
__m256i mask2_a = _mm256_cmpgt_epi8(chunk2, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
|
123
|
+
__m256i mask2_z = _mm256_cmpgt_epi8(_mm256_set1_epi8('z' + 1), chunk2);
|
124
|
+
__m256i mask2 = _mm256_and_si256(mask2_a, mask2_z);
|
125
|
+
|
126
|
+
chunk1 = _mm256_sub_epi8(chunk1, _mm256_and_si256(mask1, diff));
|
127
|
+
chunk2 = _mm256_sub_epi8(chunk2, _mm256_and_si256(mask2, diff));
|
128
|
+
|
129
|
+
_mm256_storeu_si256((__m256i*)(res_text + i), chunk1);
|
130
|
+
_mm256_storeu_si256((__m256i*)(res_text + i + 32), chunk2);
|
131
|
+
}
|
132
|
+
|
133
|
+
for (; i < len; i++) {
|
134
|
+
if (text[i] >= 'a' && text[i] <= 'z') {
|
135
|
+
res_text[i] = text[i] - 32;
|
136
|
+
} else {
|
137
|
+
res_text[i] = text[i];
|
138
|
+
}
|
139
|
+
}
|
140
|
+
|
141
|
+
return result;
|
142
|
+
}
|
143
|
+
|
144
|
+
static VALUE simd_upcase(VALUE self, VALUE str) {
|
145
|
+
// Check if the string is ASCII only
|
146
|
+
long len = RSTRING_LEN(str);
|
147
|
+
char *text = StringValuePtr(str);
|
148
|
+
for (long i = 0; i < len; i++) {
|
149
|
+
if ((unsigned char)text[i] > 127) {
|
150
|
+
// Fallback to default Ruby implementation for non-ASCII characters
|
151
|
+
return rb_funcall(str, rb_intern("upcase"), 0);
|
152
|
+
}
|
153
|
+
}
|
154
|
+
|
155
|
+
// Use SIMD optimized version for ASCII-only strings
|
156
|
+
if (has_avx2) {
|
157
|
+
return upcase_avx2(self, str);
|
158
|
+
} else if (has_avx) {
|
159
|
+
return upcase_avx1(self, str);
|
160
|
+
} else if (has_sse2) {
|
161
|
+
return upcase_sse2(self, str);
|
162
|
+
} else {
|
163
|
+
// Fallback to default Ruby implementation
|
164
|
+
return rb_funcall(str, rb_intern("upcase"), 0);
|
165
|
+
}
|
166
|
+
}
|
167
|
+
|
168
|
+
// Function to return the used instruction set
|
169
|
+
static VALUE get_instruction_set(VALUE self) {
|
170
|
+
if (has_avx2) {
|
171
|
+
return rb_str_new_cstr("AVX2");
|
172
|
+
} else if (has_avx) {
|
173
|
+
return rb_str_new_cstr("AVX");
|
174
|
+
} else if (has_sse2) {
|
175
|
+
return rb_str_new_cstr("SSE2");
|
176
|
+
} else {
|
177
|
+
return rb_str_new_cstr("DEFAULT");
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
181
|
+
void Init_simd_string_upcase(void) {
|
182
|
+
detect_cpu_features();
|
183
|
+
VALUE module = rb_define_module("SIMDStringUpcase");
|
184
|
+
rb_define_singleton_method(module, "upcase", simd_upcase, 1);
|
185
|
+
rb_define_singleton_method(module, "instruction_set", get_instruction_set, 0);
|
186
|
+
}
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: simd_string_upcase
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.8
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Marian Posaceanu
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-06-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake-compiler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.2'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.2'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: benchmark-ips
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: A SIMD-optimized replacement for Ruby's String#length method for x86_64
|
42
|
+
architectures
|
43
|
+
email: contact@marianposaceanu.com
|
44
|
+
executables: []
|
45
|
+
extensions:
|
46
|
+
- ext/simd_string_upcase/extconf.rb
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- ext/simd_string_upcase/extconf.rb
|
50
|
+
- ext/simd_string_upcase/simd_string_upcase.c
|
51
|
+
- lib/simd_string_upcase.rb
|
52
|
+
homepage: https://github.com/marianposaceanu/simd_string_upcase
|
53
|
+
licenses:
|
54
|
+
- MIT
|
55
|
+
metadata: {}
|
56
|
+
post_install_message: SIMD String#upcase installed successfully!
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: 2.0.0
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements:
|
71
|
+
- x86_64 architecture
|
72
|
+
rubygems_version: 3.5.3
|
73
|
+
signing_key:
|
74
|
+
specification_version: 4
|
75
|
+
summary: SIMD-optimized string upcase for Ruby
|
76
|
+
test_files: []
|