simd_string_upcase 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 2561a3a78349ade559c6835afa0a89c0727204a8aaa68a9721791c1824d31c54
4
+ data.tar.gz: 496036b130fd6c0394095d7594d35e29f3986efa2e4d403aef69ff2fcbe4ba56
5
+ SHA512:
6
+ metadata.gz: 180089664068d8e33af317b91aca92d27974a2301afc3183fc0593263e01357dc14eb8f16bcbcf871a0af45eca16a9cd394ccd0d653605dcc1064a0ad1aad9fd
7
+ data.tar.gz: 5227eaee9a5605ea3bbe75fd66e323e148e98deaac90a80e8f35c475bcf8df1c195dcb78784bbeaa8142fc98b6bdc0ecbf5b7c1d425156f6d4cde37eb5c82b67
@@ -0,0 +1,42 @@
1
+ require 'mkmf'
2
+
3
+ # Check for ARM64 architecture
4
+ if RUBY_PLATFORM =~ /arm64|aarch64/
5
+ raise "SIMD string upcase gem is not supported on ARM64 architecture yet."
6
+ end
7
+
8
+ have_header('emmintrin.h')
9
+ have_header('immintrin.h')
10
+
11
+ # Force enable AVX and AVX2 on macOS for known supporting architectures
12
+ if RUBY_PLATFORM =~ /darwin/
13
+ $CFLAGS << ' -mavx -mavx2'
14
+ puts "macOS detected. Forcing AVX and AVX2 support."
15
+ else
16
+ have_sse2 = have_macro('__SSE2__', 'emmintrin.h')
17
+ have_avx = have_macro('__AVX__', 'immintrin.h')
18
+ have_avx2 = have_macro('__AVX2__', 'immintrin.h')
19
+
20
+ if have_sse2
21
+ $CFLAGS << ' -msse2'
22
+ puts "SSE2 support detected and enabled."
23
+ else
24
+ puts "Warning: SSE2 not supported. The SSE2 version may not work correctly."
25
+ end
26
+
27
+ if have_avx
28
+ $CFLAGS << ' -mavx'
29
+ puts "AVX support detected and enabled."
30
+ else
31
+ puts "Warning: AVX not supported. The AVX1 version may not work correctly."
32
+ end
33
+
34
+ if have_avx2
35
+ $CFLAGS << ' -mavx2'
36
+ puts "AVX2 support detected and enabled."
37
+ else
38
+ puts "Warning: AVX2 not supported. The AVX2 version may not work correctly."
39
+ end
40
+ end
41
+
42
+ create_makefile('simd_string_upcase/simd_string_upcase')
@@ -0,0 +1,186 @@
1
+ #include <ruby.h>
2
+ #include <emmintrin.h> // For SSE2 intrinsics
3
+ #include <immintrin.h> // For AVX and AVX2 intrinsics
4
+
5
+ #ifdef __APPLE__
6
+ #include <sys/types.h>
7
+ #include <sys/sysctl.h>
8
+ #else
9
+ #include <cpuid.h>
10
+ #endif
11
+
12
+ static int has_sse2 = 0, has_avx = 0, has_avx2 = 0;
13
+
14
+ #ifdef __APPLE__
15
+ // Function to detect CPU features on macOS
16
+ static void detect_cpu_features() {
17
+ size_t sse2_len = sizeof(has_sse2);
18
+ size_t avx_len = sizeof(has_avx);
19
+ size_t avx2_len = sizeof(has_avx2);
20
+
21
+ sysctlbyname("hw.optional.sse2", &has_sse2, &sse2_len, NULL, 0);
22
+ sysctlbyname("hw.optional.avx1_0", &has_avx, &avx_len, NULL, 0);
23
+ sysctlbyname("hw.optional.avx2_0", &has_avx2, &avx2_len, NULL, 0);
24
+ }
25
+ #else
26
+ // Function to detect CPU features on Linux
27
+ static void detect_cpu_features() {
28
+ unsigned int eax, ebx, ecx, edx;
29
+
30
+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
31
+ has_sse2 = edx & (1 << 26);
32
+ has_avx = ecx & (1 << 28);
33
+ }
34
+
35
+ if (has_avx && __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx)) {
36
+ has_avx2 = ebx & (1 << 5);
37
+ }
38
+ }
39
+ #endif
40
+
41
+ static VALUE upcase_sse2(VALUE self, VALUE str) {
42
+ char *text = StringValuePtr(str);
43
+ long len = RSTRING_LEN(str);
44
+ VALUE result = rb_str_new(NULL, len);
45
+ char *res_text = StringValuePtr(result);
46
+
47
+ __m128i lower_a = _mm_set1_epi8('a');
48
+ __m128i lower_z = _mm_set1_epi8('z');
49
+ __m128i diff = _mm_set1_epi8('a' - 'A');
50
+
51
+ long i;
52
+ for (i = 0; i <= len - 16; i += 16) {
53
+ __m128i chunk = _mm_loadu_si128((__m128i*)(text + i));
54
+ __m128i mask_a = _mm_cmpgt_epi8(chunk, _mm_sub_epi8(lower_a, _mm_set1_epi8(1)));
55
+ __m128i mask_z = _mm_cmplt_epi8(chunk, _mm_add_epi8(lower_z, _mm_set1_epi8(1)));
56
+ __m128i mask = _mm_and_si128(mask_a, mask_z);
57
+ chunk = _mm_sub_epi8(chunk, _mm_and_si128(mask, diff));
58
+ _mm_storeu_si128((__m128i*)(res_text + i), chunk);
59
+ }
60
+
61
+ for (; i < len; i++) {
62
+ if (text[i] >= 'a' && text[i] <= 'z') {
63
+ res_text[i] = text[i] - ('a' - 'A');
64
+ } else {
65
+ res_text[i] = text[i];
66
+ }
67
+ }
68
+
69
+ return result;
70
+ }
71
+
72
+ static VALUE upcase_avx1(VALUE self, VALUE str) {
73
+ char *text = StringValuePtr(str);
74
+ long len = RSTRING_LEN(str);
75
+ VALUE result = rb_str_new(NULL, len);
76
+ char *res_text = StringValuePtr(result);
77
+
78
+ __m256i lower_a = _mm256_set1_epi8('a');
79
+ __m256i lower_z = _mm256_set1_epi8('z');
80
+ __m256i diff = _mm256_set1_epi8('a' - 'A');
81
+
82
+ long i;
83
+ for (i = 0; i <= len - 32; i += 32) {
84
+ __m256i chunk = _mm256_loadu_si256((__m256i*)(text + i));
85
+ __m256i mask_a = _mm256_cmpgt_epi8(chunk, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
86
+ __m256i mask_z = _mm256_cmpgt_epi8(_mm256_set1_epi8('z' + 1), chunk);
87
+ __m256i mask = _mm256_and_si256(mask_a, mask_z);
88
+ chunk = _mm256_sub_epi8(chunk, _mm256_and_si256(mask, diff));
89
+ _mm256_storeu_si256((__m256i*)(res_text + i), chunk);
90
+ }
91
+
92
+ for (; i < len; i++) {
93
+ if (text[i] >= 'a' && text[i] <= 'z') {
94
+ res_text[i] = text[i] - 32;
95
+ } else {
96
+ res_text[i] = text[i];
97
+ }
98
+ }
99
+
100
+ return result;
101
+ }
102
+
103
+ static VALUE upcase_avx2(VALUE self, VALUE str) {
104
+ char *text = StringValuePtr(str);
105
+ long len = RSTRING_LEN(str);
106
+ VALUE result = rb_str_new(NULL, len);
107
+ char *res_text = StringValuePtr(result);
108
+
109
+ __m256i lower_a = _mm256_set1_epi8('a');
110
+ __m256i lower_z = _mm256_set1_epi8('z');
111
+ __m256i diff = _mm256_set1_epi8('a' - 'A');
112
+
113
+ long i;
114
+ for (i = 0; i <= len - 64; i += 64) {
115
+ __m256i chunk1 = _mm256_loadu_si256((__m256i*)(text + i));
116
+ __m256i chunk2 = _mm256_loadu_si256((__m256i*)(text + i + 32));
117
+
118
+ __m256i mask1_a = _mm256_cmpgt_epi8(chunk1, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
119
+ __m256i mask1_z = _mm256_cmpgt_epi8(_mm256_set1_epi8('z' + 1), chunk1);
120
+ __m256i mask1 = _mm256_and_si256(mask1_a, mask1_z);
121
+
122
+ __m256i mask2_a = _mm256_cmpgt_epi8(chunk2, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1)));
123
+ __m256i mask2_z = _mm256_cmpgt_epi8(_mm256_set1_epi8('z' + 1), chunk2);
124
+ __m256i mask2 = _mm256_and_si256(mask2_a, mask2_z);
125
+
126
+ chunk1 = _mm256_sub_epi8(chunk1, _mm256_and_si256(mask1, diff));
127
+ chunk2 = _mm256_sub_epi8(chunk2, _mm256_and_si256(mask2, diff));
128
+
129
+ _mm256_storeu_si256((__m256i*)(res_text + i), chunk1);
130
+ _mm256_storeu_si256((__m256i*)(res_text + i + 32), chunk2);
131
+ }
132
+
133
+ for (; i < len; i++) {
134
+ if (text[i] >= 'a' && text[i] <= 'z') {
135
+ res_text[i] = text[i] - 32;
136
+ } else {
137
+ res_text[i] = text[i];
138
+ }
139
+ }
140
+
141
+ return result;
142
+ }
143
+
144
+ static VALUE simd_upcase(VALUE self, VALUE str) {
145
+ // Check if the string is ASCII only
146
+ long len = RSTRING_LEN(str);
147
+ char *text = StringValuePtr(str);
148
+ for (long i = 0; i < len; i++) {
149
+ if ((unsigned char)text[i] > 127) {
150
+ // Fallback to default Ruby implementation for non-ASCII characters
151
+ return rb_funcall(str, rb_intern("upcase"), 0);
152
+ }
153
+ }
154
+
155
+ // Use SIMD optimized version for ASCII-only strings
156
+ if (has_avx2) {
157
+ return upcase_avx2(self, str);
158
+ } else if (has_avx) {
159
+ return upcase_avx1(self, str);
160
+ } else if (has_sse2) {
161
+ return upcase_sse2(self, str);
162
+ } else {
163
+ // Fallback to default Ruby implementation
164
+ return rb_funcall(str, rb_intern("upcase"), 0);
165
+ }
166
+ }
167
+
168
+ // Function to return the used instruction set
169
+ static VALUE get_instruction_set(VALUE self) {
170
+ if (has_avx2) {
171
+ return rb_str_new_cstr("AVX2");
172
+ } else if (has_avx) {
173
+ return rb_str_new_cstr("AVX");
174
+ } else if (has_sse2) {
175
+ return rb_str_new_cstr("SSE2");
176
+ } else {
177
+ return rb_str_new_cstr("DEFAULT");
178
+ }
179
+ }
180
+
181
+ void Init_simd_string_upcase(void) {
182
+ detect_cpu_features();
183
+ VALUE module = rb_define_module("SIMDStringUpcase");
184
+ rb_define_singleton_method(module, "upcase", simd_upcase, 1);
185
+ rb_define_singleton_method(module, "instruction_set", get_instruction_set, 0);
186
+ }
@@ -0,0 +1,9 @@
1
+ require 'simd_string_upcase/simd_string_upcase'
2
+
3
+ class String
4
+ alias_method :original_upcase, :upcase
5
+
6
+ def upcase
7
+ SIMDStringUpcase.upcase(self)
8
+ end
9
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simd_string_upcase
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.8
5
+ platform: ruby
6
+ authors:
7
+ - Marian Posaceanu
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-06-23 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake-compiler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.2'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: benchmark-ips
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: A SIMD-optimized replacement for Ruby's String#length method for x86_64
42
+ architectures
43
+ email: contact@marianposaceanu.com
44
+ executables: []
45
+ extensions:
46
+ - ext/simd_string_upcase/extconf.rb
47
+ extra_rdoc_files: []
48
+ files:
49
+ - ext/simd_string_upcase/extconf.rb
50
+ - ext/simd_string_upcase/simd_string_upcase.c
51
+ - lib/simd_string_upcase.rb
52
+ homepage: https://github.com/marianposaceanu/simd_string_upcase
53
+ licenses:
54
+ - MIT
55
+ metadata: {}
56
+ post_install_message: SIMD String#upcase installed successfully!
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 2.0.0
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements:
71
+ - x86_64 architecture
72
+ rubygems_version: 3.5.3
73
+ signing_key:
74
+ specification_version: 4
75
+ summary: SIMD-optimized string upcase for Ruby
76
+ test_files: []