simd_string_upcase 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 2561a3a78349ade559c6835afa0a89c0727204a8aaa68a9721791c1824d31c54
4
+ data.tar.gz: 496036b130fd6c0394095d7594d35e29f3986efa2e4d403aef69ff2fcbe4ba56
5
+ SHA512:
6
+ metadata.gz: 180089664068d8e33af317b91aca92d27974a2301afc3183fc0593263e01357dc14eb8f16bcbcf871a0af45eca16a9cd394ccd0d653605dcc1064a0ad1aad9fd
7
+ data.tar.gz: 5227eaee9a5605ea3bbe75fd66e323e148e98deaac90a80e8f35c475bcf8df1c195dcb78784bbeaa8142fc98b6bdc0ecbf5b7c1d425156f6d4cde37eb5c82b67
@@ -0,0 +1,42 @@
1
+ require 'mkmf'
2
+
3
+ # Check for ARM64 architecture
4
+ if RUBY_PLATFORM =~ /arm64|aarch64/
5
+ raise "SIMD string upcase gem is not supported on ARM64 architecture yet."
6
+ end
7
+
8
+ have_header('emmintrin.h')
9
+ have_header('immintrin.h')
10
+
11
+ # Force enable AVX and AVX2 on macOS for known supporting architectures
12
+ if RUBY_PLATFORM =~ /darwin/
13
+ $CFLAGS << ' -mavx -mavx2'
14
+ puts "macOS detected. Forcing AVX and AVX2 support."
15
+ else
16
+ have_sse2 = have_macro('__SSE2__', 'emmintrin.h')
17
+ have_avx = have_macro('__AVX__', 'immintrin.h')
18
+ have_avx2 = have_macro('__AVX2__', 'immintrin.h')
19
+
20
+ if have_sse2
21
+ $CFLAGS << ' -msse2'
22
+ puts "SSE2 support detected and enabled."
23
+ else
24
+ puts "Warning: SSE2 not supported. The SSE2 version may not work correctly."
25
+ end
26
+
27
+ if have_avx
28
+ $CFLAGS << ' -mavx'
29
+ puts "AVX support detected and enabled."
30
+ else
31
+ puts "Warning: AVX not supported. The AVX1 version may not work correctly."
32
+ end
33
+
34
+ if have_avx2
35
+ $CFLAGS << ' -mavx2'
36
+ puts "AVX2 support detected and enabled."
37
+ else
38
+ puts "Warning: AVX2 not supported. The AVX2 version may not work correctly."
39
+ end
40
+ end
41
+
42
+ create_makefile('simd_string_upcase/simd_string_upcase')
@@ -0,0 +1,186 @@
1
+ #include <ruby.h>
2
+ #include <emmintrin.h> // For SSE2 intrinsics
3
+ #include <immintrin.h> // For AVX and AVX2 intrinsics
4
+
5
+ #ifdef __APPLE__
6
+ #include <sys/types.h>
7
+ #include <sys/sysctl.h>
8
+ #else
9
+ #include <cpuid.h>
10
+ #endif
11
+
12
+ static int has_sse2 = 0, has_avx = 0, has_avx2 = 0;
13
+
14
+ #ifdef __APPLE__
15
+ // Function to detect CPU features on macOS
16
+ static void detect_cpu_features() {
17
+ size_t sse2_len = sizeof(has_sse2);
18
+ size_t avx_len = sizeof(has_avx);
19
+ size_t avx2_len = sizeof(has_avx2);
20
+
21
+ sysctlbyname("hw.optional.sse2", &has_sse2, &sse2_len, NULL, 0);
22
+ sysctlbyname("hw.optional.avx1_0", &has_avx, &avx_len, NULL, 0);
23
+ sysctlbyname("hw.optional.avx2_0", &has_avx2, &avx2_len, NULL, 0);
24
+ }
25
+ #else
26
+ // Function to detect CPU features on Linux
27
+ static void detect_cpu_features() {
28
+ unsigned int eax, ebx, ecx, edx;
29
+
30
+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
31
+ has_sse2 = edx & (1 << 26);
32
+ has_avx = ecx & (1 << 28);
33
+ }
34
+
35
+ if (has_avx && __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx)) {
36
+ has_avx2 = ebx & (1 << 5);
37
+ }
38
+ }
39
+ #endif
40
+
41
+ static VALUE upcase_sse2(VALUE self, VALUE str) {
42
+ char *text = StringValuePtr(str);
43
+ long len = RSTRING_LEN(str);
44
+ VALUE result = rb_str_new(NULL, len);
45
+ char *res_text = StringValuePtr(result);
46
+
47
+ __m128i lower_a = _mm_set1_epi8('a');
48
+ __m128i lower_z = _mm_set1_epi8('z');
49
+ __m128i diff = _mm_set1_epi8('a' - 'A');
50
+
51
+ long i;
52
+ for (i = 0; i <= len - 16; i += 16) {
53
+ __m128i chunk = _mm_loadu_si128((__m128i*)(text + i));
54
+ __m128i mask_a = _mm_cmpgt_epi8(chunk, _mm_sub_epi8(lower_a, _mm_set1_epi8(1)));
55
+ __m128i mask_z = _mm_cmplt_epi8(chunk, _mm_add_epi8(lower_z, _mm_set1_epi8(1)));
56
+ __m128i mask = _mm_and_si128(mask_a, mask_z);
57
+ chunk = _mm_sub_epi8(chunk, _mm_and_si128(mask, diff));
58
+ _mm_storeu_si128((__m128i*)(res_text + i), chunk);
59
+ }
60
+
61
+ for (; i < len; i++) {
62
+ if (text[i] >= 'a' && text[i] <= 'z') {
63
+ res_text[i] = text[i] - ('a' - 'A');
64
+ } else {
65
+ res_text[i] = text[i];
66
+ }
67
+ }
68
+
69
+ return result;
70
+ }
71
+
72
+ static VALUE upcase_avx1(VALUE self, VALUE str) {
73
+ char *text = StringValuePtr(str);
74
+ long len = RSTRING_LEN(str);
75
+ VALUE result = rb_str_new(NULL, len);
76
+ char *res_text = StringValuePtr(result);
77
+
78
+ __m256i lower_a = _mm256_set1_epi8('a');
79
+ __m256i lower_z = _mm256_set1_epi8('z');
80
+ __m256i diff = _mm256_set1_epi8('a' - 'A');
81
+
82
+ long i;
83
+ for (i = 0; i <= len - 32; i += 32) {
84
+ __m256i chunk = _mm256_loadu_si256((__m256i*)(text + i));
85
+ __m256i mask_a = _mm256_cmpgt_epi8(chunk, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
86
+ __m256i mask_z = _mm256_cmpgt_epi8(_mm256_set1_epi8('z' + 1), chunk);
87
+ __m256i mask = _mm256_and_si256(mask_a, mask_z);
88
+ chunk = _mm256_sub_epi8(chunk, _mm256_and_si256(mask, diff));
89
+ _mm256_storeu_si256((__m256i*)(res_text + i), chunk);
90
+ }
91
+
92
+ for (; i < len; i++) {
93
+ if (text[i] >= 'a' && text[i] <= 'z') {
94
+ res_text[i] = text[i] - 32;
95
+ } else {
96
+ res_text[i] = text[i];
97
+ }
98
+ }
99
+
100
+ return result;
101
+ }
102
+
103
+ static VALUE upcase_avx2(VALUE self, VALUE str) {
104
+ char *text = StringValuePtr(str);
105
+ long len = RSTRING_LEN(str);
106
+ VALUE result = rb_str_new(NULL, len);
107
+ char *res_text = StringValuePtr(result);
108
+
109
+ __m256i lower_a = _mm256_set1_epi8('a');
110
+ __m256i lower_z = _mm256_set1_epi8('z');
111
+ __m256i diff = _mm256_set1_epi8('a' - 'A');
112
+
113
+ long i;
114
+ for (i = 0; i <= len - 64; i += 64) {
115
+ __m256i chunk1 = _mm256_loadu_si256((__m256i*)(text + i));
116
+ __m256i chunk2 = _mm256_loadu_si256((__m256i*)(text + i + 32));
117
+
118
+ __m256i mask1_a = _mm256_cmpgt_epi8(chunk1, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
119
+ __m256i mask1_z = _mm256_cmpgt_epi8(_mm256_set1_epi8('z' + 1), chunk1);
120
+ __m256i mask1 = _mm256_and_si256(mask1_a, mask1_z);
121
+
122
+ __m256i mask2_a = _mm256_cmpgt_epi8(chunk2, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
123
+ __m256i mask2_z = _mm256_cmpgt_epi8(_mm256_set1_epi8('z' + 1), chunk2);
124
+ __m256i mask2 = _mm256_and_si256(mask2_a, mask2_z);
125
+
126
+ chunk1 = _mm256_sub_epi8(chunk1, _mm256_and_si256(mask1, diff));
127
+ chunk2 = _mm256_sub_epi8(chunk2, _mm256_and_si256(mask2, diff));
128
+
129
+ _mm256_storeu_si256((__m256i*)(res_text + i), chunk1);
130
+ _mm256_storeu_si256((__m256i*)(res_text + i + 32), chunk2);
131
+ }
132
+
133
+ for (; i < len; i++) {
134
+ if (text[i] >= 'a' && text[i] <= 'z') {
135
+ res_text[i] = text[i] - 32;
136
+ } else {
137
+ res_text[i] = text[i];
138
+ }
139
+ }
140
+
141
+ return result;
142
+ }
143
+
144
+ static VALUE simd_upcase(VALUE self, VALUE str) {
145
+ // Check if the string is ASCII only
146
+ long len = RSTRING_LEN(str);
147
+ char *text = StringValuePtr(str);
148
+ for (long i = 0; i < len; i++) {
149
+ if ((unsigned char)text[i] > 127) {
150
+ // Fallback to default Ruby implementation for non-ASCII characters
151
+ return rb_funcall(str, rb_intern("upcase"), 0);
152
+ }
153
+ }
154
+
155
+ // Use SIMD optimized version for ASCII-only strings
156
+ if (has_avx2) {
157
+ return upcase_avx2(self, str);
158
+ } else if (has_avx) {
159
+ return upcase_avx1(self, str);
160
+ } else if (has_sse2) {
161
+ return upcase_sse2(self, str);
162
+ } else {
163
+ // Fallback to default Ruby implementation
164
+ return rb_funcall(str, rb_intern("upcase"), 0);
165
+ }
166
+ }
167
+
168
+ // Function to return the used instruction set
169
+ static VALUE get_instruction_set(VALUE self) {
170
+ if (has_avx2) {
171
+ return rb_str_new_cstr("AVX2");
172
+ } else if (has_avx) {
173
+ return rb_str_new_cstr("AVX");
174
+ } else if (has_sse2) {
175
+ return rb_str_new_cstr("SSE2");
176
+ } else {
177
+ return rb_str_new_cstr("DEFAULT");
178
+ }
179
+ }
180
+
181
+ void Init_simd_string_upcase(void) {
182
+ detect_cpu_features();
183
+ VALUE module = rb_define_module("SIMDStringUpcase");
184
+ rb_define_singleton_method(module, "upcase", simd_upcase, 1);
185
+ rb_define_singleton_method(module, "instruction_set", get_instruction_set, 0);
186
+ }
@@ -0,0 +1,9 @@
1
+ require 'simd_string_upcase/simd_string_upcase'
2
+
3
+ class String
4
+ alias_method :original_upcase, :upcase
5
+
6
+ def upcase
7
+ SIMDStringUpcase.upcase(self)
8
+ end
9
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simd_string_upcase
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.8
5
+ platform: ruby
6
+ authors:
7
+ - Marian Posaceanu
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-06-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake-compiler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.2'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: benchmark-ips
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: A SIMD-optimized replacement for Ruby's String#length method for x86_64
42
+ architectures
43
+ email: contact@marianposaceanu.com
44
+ executables: []
45
+ extensions:
46
+ - ext/simd_string_upcase/extconf.rb
47
+ extra_rdoc_files: []
48
+ files:
49
+ - ext/simd_string_upcase/extconf.rb
50
+ - ext/simd_string_upcase/simd_string_upcase.c
51
+ - lib/simd_string_upcase.rb
52
+ homepage: https://github.com/marianposaceanu/simd_string_upcase
53
+ licenses:
54
+ - MIT
55
+ metadata: {}
56
+ post_install_message: SIMD String#upcase installed successfully!
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 2.0.0
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements:
71
+ - x86_64 architecture
72
+ rubygems_version: 3.5.3
73
+ signing_key:
74
+ specification_version: 4
75
+ summary: SIMD-optimized string upcase for Ruby
76
+ test_files: []