simd_string_upcase 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/ext/simd_string_upcase/extconf.rb +42 -0
- data/ext/simd_string_upcase/simd_string_upcase.c +186 -0
- data/lib/simd_string_upcase.rb +9 -0
- metadata +76 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 2561a3a78349ade559c6835afa0a89c0727204a8aaa68a9721791c1824d31c54
|
4
|
+
data.tar.gz: 496036b130fd6c0394095d7594d35e29f3986efa2e4d403aef69ff2fcbe4ba56
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 180089664068d8e33af317b91aca92d27974a2301afc3183fc0593263e01357dc14eb8f16bcbcf871a0af45eca16a9cd394ccd0d653605dcc1064a0ad1aad9fd
|
7
|
+
data.tar.gz: 5227eaee9a5605ea3bbe75fd66e323e148e98deaac90a80e8f35c475bcf8df1c195dcb78784bbeaa8142fc98b6bdc0ecbf5b7c1d425156f6d4cde37eb5c82b67
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
# Check for ARM64 architecture
|
4
|
+
if RUBY_PLATFORM =~ /arm64|aarch64/
|
5
|
+
raise "SIMD string upcase gem is not supported on ARM64 architecture yet."
|
6
|
+
end
|
7
|
+
|
8
|
+
have_header('emmintrin.h')
|
9
|
+
have_header('immintrin.h')
|
10
|
+
|
11
|
+
# Force enable AVX and AVX2 on macOS for known supporting architectures
|
12
|
+
if RUBY_PLATFORM =~ /darwin/
|
13
|
+
$CFLAGS << ' -mavx -mavx2'
|
14
|
+
puts "macOS detected. Forcing AVX and AVX2 support."
|
15
|
+
else
|
16
|
+
have_sse2 = have_macro('__SSE2__', 'emmintrin.h')
|
17
|
+
have_avx = have_macro('__AVX__', 'immintrin.h')
|
18
|
+
have_avx2 = have_macro('__AVX2__', 'immintrin.h')
|
19
|
+
|
20
|
+
if have_sse2
|
21
|
+
$CFLAGS << ' -msse2'
|
22
|
+
puts "SSE2 support detected and enabled."
|
23
|
+
else
|
24
|
+
puts "Warning: SSE2 not supported. The SSE2 version may not work correctly."
|
25
|
+
end
|
26
|
+
|
27
|
+
if have_avx
|
28
|
+
$CFLAGS << ' -mavx'
|
29
|
+
puts "AVX support detected and enabled."
|
30
|
+
else
|
31
|
+
puts "Warning: AVX not supported. The AVX1 version may not work correctly."
|
32
|
+
end
|
33
|
+
|
34
|
+
if have_avx2
|
35
|
+
$CFLAGS << ' -mavx2'
|
36
|
+
puts "AVX2 support detected and enabled."
|
37
|
+
else
|
38
|
+
puts "Warning: AVX2 not supported. The AVX2 version may not work correctly."
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
create_makefile('simd_string_upcase/simd_string_upcase')
|
@@ -0,0 +1,186 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <emmintrin.h> // For SSE2 intrinsics
|
3
|
+
#include <immintrin.h> // For AVX and AVX2 intrinsics
|
4
|
+
|
5
|
+
#ifdef __APPLE__
|
6
|
+
#include <sys/types.h>
|
7
|
+
#include <sys/sysctl.h>
|
8
|
+
#else
|
9
|
+
#include <cpuid.h>
|
10
|
+
#endif
|
11
|
+
|
12
|
+
static int has_sse2 = 0, has_avx = 0, has_avx2 = 0;
|
13
|
+
|
14
|
+
#ifdef __APPLE__
|
15
|
+
// Function to detect CPU features on macOS
|
16
|
+
static void detect_cpu_features() {
|
17
|
+
size_t sse2_len = sizeof(has_sse2);
|
18
|
+
size_t avx_len = sizeof(has_avx);
|
19
|
+
size_t avx2_len = sizeof(has_avx2);
|
20
|
+
|
21
|
+
sysctlbyname("hw.optional.sse2", &has_sse2, &sse2_len, NULL, 0);
|
22
|
+
sysctlbyname("hw.optional.avx1_0", &has_avx, &avx_len, NULL, 0);
|
23
|
+
sysctlbyname("hw.optional.avx2_0", &has_avx2, &avx2_len, NULL, 0);
|
24
|
+
}
|
25
|
+
#else
|
26
|
+
// Function to detect CPU features on Linux
|
27
|
+
static void detect_cpu_features() {
|
28
|
+
unsigned int eax, ebx, ecx, edx;
|
29
|
+
|
30
|
+
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
|
31
|
+
has_sse2 = edx & (1 << 26);
|
32
|
+
has_avx = ecx & (1 << 28);
|
33
|
+
}
|
34
|
+
|
35
|
+
if (has_avx && __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx)) {
|
36
|
+
has_avx2 = ebx & (1 << 5);
|
37
|
+
}
|
38
|
+
}
|
39
|
+
#endif
|
40
|
+
|
41
|
+
static VALUE upcase_sse2(VALUE self, VALUE str) {
|
42
|
+
char *text = StringValuePtr(str);
|
43
|
+
long len = RSTRING_LEN(str);
|
44
|
+
VALUE result = rb_str_new(NULL, len);
|
45
|
+
char *res_text = StringValuePtr(result);
|
46
|
+
|
47
|
+
__m128i lower_a = _mm_set1_epi8('a');
|
48
|
+
__m128i lower_z = _mm_set1_epi8('z');
|
49
|
+
__m128i diff = _mm_set1_epi8('a' - 'A');
|
50
|
+
|
51
|
+
long i;
|
52
|
+
for (i = 0; i <= len - 16; i += 16) {
|
53
|
+
__m128i chunk = _mm_loadu_si128((__m128i*)(text + i));
|
54
|
+
__m128i mask_a = _mm_cmpgt_epi8(chunk, _mm_sub_epi8(lower_a, _mm_set1_epi8(1)));
|
55
|
+
__m128i mask_z = _mm_cmplt_epi8(chunk, _mm_add_epi8(lower_z, _mm_set1_epi8(1)));
|
56
|
+
__m128i mask = _mm_and_si128(mask_a, mask_z);
|
57
|
+
chunk = _mm_sub_epi8(chunk, _mm_and_si128(mask, diff));
|
58
|
+
_mm_storeu_si128((__m128i*)(res_text + i), chunk);
|
59
|
+
}
|
60
|
+
|
61
|
+
for (; i < len; i++) {
|
62
|
+
if (text[i] >= 'a' && text[i] <= 'z') {
|
63
|
+
res_text[i] = text[i] - ('a' - 'A');
|
64
|
+
} else {
|
65
|
+
res_text[i] = text[i];
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
return result;
|
70
|
+
}
|
71
|
+
|
72
|
+
static VALUE upcase_avx1(VALUE self, VALUE str) {
|
73
|
+
char *text = StringValuePtr(str);
|
74
|
+
long len = RSTRING_LEN(str);
|
75
|
+
VALUE result = rb_str_new(NULL, len);
|
76
|
+
char *res_text = StringValuePtr(result);
|
77
|
+
|
78
|
+
__m256i lower_a = _mm256_set1_epi8('a');
|
79
|
+
__m256i lower_z = _mm256_set1_epi8('z');
|
80
|
+
__m256i diff = _mm256_set1_epi8('a' - 'A');
|
81
|
+
|
82
|
+
long i;
|
83
|
+
for (i = 0; i <= len - 32; i += 32) {
|
84
|
+
__m256i chunk = _mm256_loadu_si256((__m256i*)(text + i));
|
85
|
+
__m256i mask_a = _mm256_cmpgt_epi8(chunk, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
|
86
|
+
__m256i mask_z = _mm256_cmpgt_epi8(_mm256_set1_epi8('z' + 1), chunk);
|
87
|
+
__m256i mask = _mm256_and_si256(mask_a, mask_z);
|
88
|
+
chunk = _mm256_sub_epi8(chunk, _mm256_and_si256(mask, diff));
|
89
|
+
_mm256_storeu_si256((__m256i*)(res_text + i), chunk);
|
90
|
+
}
|
91
|
+
|
92
|
+
for (; i < len; i++) {
|
93
|
+
if (text[i] >= 'a' && text[i] <= 'z') {
|
94
|
+
res_text[i] = text[i] - 32;
|
95
|
+
} else {
|
96
|
+
res_text[i] = text[i];
|
97
|
+
}
|
98
|
+
}
|
99
|
+
|
100
|
+
return result;
|
101
|
+
}
|
102
|
+
|
103
|
+
static VALUE upcase_avx2(VALUE self, VALUE str) {
|
104
|
+
char *text = StringValuePtr(str);
|
105
|
+
long len = RSTRING_LEN(str);
|
106
|
+
VALUE result = rb_str_new(NULL, len);
|
107
|
+
char *res_text = StringValuePtr(result);
|
108
|
+
|
109
|
+
__m256i lower_a = _mm256_set1_epi8('a');
|
110
|
+
__m256i lower_z = _mm256_set1_epi8('z');
|
111
|
+
__m256i diff = _mm256_set1_epi8('a' - 'A');
|
112
|
+
|
113
|
+
long i;
|
114
|
+
for (i = 0; i <= len - 64; i += 64) {
|
115
|
+
__m256i chunk1 = _mm256_loadu_si256((__m256i*)(text + i));
|
116
|
+
__m256i chunk2 = _mm256_loadu_si256((__m256i*)(text + i + 32));
|
117
|
+
|
118
|
+
__m256i mask1_a = _mm256_cmpgt_epi8(chunk1, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
|
119
|
+
__m256i mask1_z = _mm256_cmpgt_epi8(_mm256_set1_epi8('z' + 1), chunk1);
|
120
|
+
__m256i mask1 = _mm256_and_si256(mask1_a, mask1_z);
|
121
|
+
|
122
|
+
__m256i mask2_a = _mm256_cmpgt_epi8(chunk2, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
|
123
|
+
__m256i mask2_z = _mm256_cmpgt_epi8(_mm256_set1_epi8('z' + 1), chunk2);
|
124
|
+
__m256i mask2 = _mm256_and_si256(mask2_a, mask2_z);
|
125
|
+
|
126
|
+
chunk1 = _mm256_sub_epi8(chunk1, _mm256_and_si256(mask1, diff));
|
127
|
+
chunk2 = _mm256_sub_epi8(chunk2, _mm256_and_si256(mask2, diff));
|
128
|
+
|
129
|
+
_mm256_storeu_si256((__m256i*)(res_text + i), chunk1);
|
130
|
+
_mm256_storeu_si256((__m256i*)(res_text + i + 32), chunk2);
|
131
|
+
}
|
132
|
+
|
133
|
+
for (; i < len; i++) {
|
134
|
+
if (text[i] >= 'a' && text[i] <= 'z') {
|
135
|
+
res_text[i] = text[i] - 32;
|
136
|
+
} else {
|
137
|
+
res_text[i] = text[i];
|
138
|
+
}
|
139
|
+
}
|
140
|
+
|
141
|
+
return result;
|
142
|
+
}
|
143
|
+
|
144
|
+
static VALUE simd_upcase(VALUE self, VALUE str) {
|
145
|
+
// Check if the string is ASCII only
|
146
|
+
long len = RSTRING_LEN(str);
|
147
|
+
char *text = StringValuePtr(str);
|
148
|
+
for (long i = 0; i < len; i++) {
|
149
|
+
if ((unsigned char)text[i] > 127) {
|
150
|
+
// Fallback to default Ruby implementation for non-ASCII characters
|
151
|
+
return rb_funcall(str, rb_intern("upcase"), 0);
|
152
|
+
}
|
153
|
+
}
|
154
|
+
|
155
|
+
// Use SIMD optimized version for ASCII-only strings
|
156
|
+
if (has_avx2) {
|
157
|
+
return upcase_avx2(self, str);
|
158
|
+
} else if (has_avx) {
|
159
|
+
return upcase_avx1(self, str);
|
160
|
+
} else if (has_sse2) {
|
161
|
+
return upcase_sse2(self, str);
|
162
|
+
} else {
|
163
|
+
// Fallback to default Ruby implementation
|
164
|
+
return rb_funcall(str, rb_intern("upcase"), 0);
|
165
|
+
}
|
166
|
+
}
|
167
|
+
|
168
|
+
// Function to return the used instruction set
|
169
|
+
static VALUE get_instruction_set(VALUE self) {
|
170
|
+
if (has_avx2) {
|
171
|
+
return rb_str_new_cstr("AVX2");
|
172
|
+
} else if (has_avx) {
|
173
|
+
return rb_str_new_cstr("AVX");
|
174
|
+
} else if (has_sse2) {
|
175
|
+
return rb_str_new_cstr("SSE2");
|
176
|
+
} else {
|
177
|
+
return rb_str_new_cstr("DEFAULT");
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
181
|
+
void Init_simd_string_upcase(void) {
|
182
|
+
detect_cpu_features();
|
183
|
+
VALUE module = rb_define_module("SIMDStringUpcase");
|
184
|
+
rb_define_singleton_method(module, "upcase", simd_upcase, 1);
|
185
|
+
rb_define_singleton_method(module, "instruction_set", get_instruction_set, 0);
|
186
|
+
}
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: simd_string_upcase
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.8
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Marian Posaceanu
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-06-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake-compiler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.2'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.2'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: benchmark-ips
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: A SIMD-optimized replacement for Ruby's String#length method for x86_64
|
42
|
+
architectures
|
43
|
+
email: contact@marianposaceanu.com
|
44
|
+
executables: []
|
45
|
+
extensions:
|
46
|
+
- ext/simd_string_upcase/extconf.rb
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- ext/simd_string_upcase/extconf.rb
|
50
|
+
- ext/simd_string_upcase/simd_string_upcase.c
|
51
|
+
- lib/simd_string_upcase.rb
|
52
|
+
homepage: https://github.com/marianposaceanu/simd_string_upcase
|
53
|
+
licenses:
|
54
|
+
- MIT
|
55
|
+
metadata: {}
|
56
|
+
post_install_message: SIMD String#upcase installed successfully!
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: 2.0.0
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements:
|
71
|
+
- x86_64 architecture
|
72
|
+
rubygems_version: 3.5.3
|
73
|
+
signing_key:
|
74
|
+
specification_version: 4
|
75
|
+
summary: SIMD-optimized string upcase for Ruby
|
76
|
+
test_files: []
|