table_string_replacer 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +23 -0
- data/LICENSE.txt +21 -0
- data/README.md +69 -0
- data/ext/table_string_replacer/extconf.rb +20 -0
- data/ext/table_string_replacer/table_string_replacer.c +318 -0
- data/lib/table_string_replacer/benchmarking.rb +78 -0
- data/lib/table_string_replacer/version.rb +3 -0
- data/lib/table_string_replacer.rb +31 -0
- metadata +56 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cb11a5cfd1ec0a7dec1a8131c2b5324552fceaf90df427b973793740fc85d385
|
4
|
+
data.tar.gz: bbf93bcb1e70775d92d24fa95a7d7f6427a86deb16c06ce1fcd57ac5f62e4890
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c88257041447a1c505760949f57163c12819097280aef4cf319dcb1288fe38613a6832531b1ff55370ca989e8fdbad610939871f6164bca5a0ddce0000615ddf
|
7
|
+
data.tar.gz: 19d525e54a7c6baa90c82fba83a72eedbbcce4b3cd92918010040e2ebb0d950c0cb97bf89ad1f38862d38fde96d2506f3404239d3699bb3051120428bcadd26b
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
All notable changes to this project will be documented in this file.
|
4
|
+
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
7
|
+
|
8
|
+
## [0.2.0] - 2023-03-18
|
9
|
+
|
10
|
+
### Added
|
11
|
+
- Thread safety guarantees for all methods
|
12
|
+
- Benchmarking utility to compare with native Ruby
|
13
|
+
- Support for PHP serialized data with proper string length adjustment
|
14
|
+
|
15
|
+
### Fixed
|
16
|
+
- Memory leaks in the C extension
|
17
|
+
|
18
|
+
## [0.1.0] - 2023-03-01
|
19
|
+
|
20
|
+
### Added
|
21
|
+
- Initial release
|
22
|
+
- Basic batch replacement functionality
|
23
|
+
- C extension for performance
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2023 Your Name
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# TableStringReplacer
|
2
|
+
|
3
|
+
A C extension for Ruby that provides optimized string replacement operations, with special handling for PHP serialized strings (commonly used in WordPress).
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'table_string_replacer'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle install
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install table_string_replacer
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require 'table_string_replacer'
|
25
|
+
|
26
|
+
# Simple batch replacement
|
27
|
+
content = "Hello world, hello ruby"
|
28
|
+
replacements = [
|
29
|
+
["Hello", "Hi"],
|
30
|
+
["world", "everyone"],
|
31
|
+
["ruby", "Rails"]
|
32
|
+
]
|
33
|
+
|
34
|
+
# Perform all replacements in a single pass
|
35
|
+
result = TableStringReplacer.batch_replace(content, replacements)
|
36
|
+
# => "Hi everyone, hello Rails"
|
37
|
+
|
38
|
+
# For WordPress serialized data
|
39
|
+
serialized = 's:15:"http://old.com/";'
|
40
|
+
old_str = "http://old.com"
|
41
|
+
new_str = "https://new.com"
|
42
|
+
|
43
|
+
# This will handle the PHP serialized string length correctly
|
44
|
+
result = TableStringReplacer.serialized_str_replace(serialized, old_str, new_str)
|
45
|
+
# => 's:16:"https://new.com/";'
|
46
|
+
```
|
47
|
+
|
48
|
+
## Performance
|
49
|
+
|
50
|
+
This gem is especially useful for batch operations on large strings where you need to make multiple replacements in a single pass. The C extension is typically much faster than Ruby's native string replacement for large inputs.
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
# Check if the C extension is faster for your workload
|
54
|
+
TableStringReplacer.faster_than_ruby?(your_string, your_replacements)
|
55
|
+
```
|
56
|
+
|
57
|
+
## Development
|
58
|
+
|
59
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
60
|
+
|
61
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
62
|
+
|
63
|
+
## Contributing
|
64
|
+
|
65
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/yourusername/table_string_replacer.
|
66
|
+
|
67
|
+
## License
|
68
|
+
|
69
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
# Check for strcasestr function
|
4
|
+
have_func('strcasestr')
|
5
|
+
have_header('string.h')
|
6
|
+
have_header('stdlib.h')
|
7
|
+
have_header('ctype.h')
|
8
|
+
|
9
|
+
# Add optimization flags for best performance
|
10
|
+
$CFLAGS << ' -O3 -fPIC -fomit-frame-pointer -ffast-math -funroll-loops'
|
11
|
+
|
12
|
+
# Platform-specific optimizations
|
13
|
+
if RUBY_PLATFORM =~ /x86_64|i386/
|
14
|
+
$CFLAGS << ' -msse4.2' if try_cflags('-msse4.2')
|
15
|
+
end
|
16
|
+
|
17
|
+
# Thread safety flags
|
18
|
+
$CFLAGS << ' -pthread' unless $CFLAGS.include?('-pthread')
|
19
|
+
|
20
|
+
create_makefile('table_string_replacer/table_string_replacer')
|
@@ -0,0 +1,318 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <string.h>
|
3
|
+
#include <ctype.h>
|
4
|
+
#include <stdlib.h>
|
5
|
+
|
6
|
+
// Thread-local Boyer-Moore search buffer size
|
7
|
+
#define BM_BUFFER_SIZE 256
|
8
|
+
|
9
|
+
#ifndef HAVE_STRCASESTR
|
10
|
+
char *strcasestr(const char *haystack, const char *needle) {
|
11
|
+
size_t needle_len = strlen(needle);
|
12
|
+
size_t haystack_len = strlen(haystack);
|
13
|
+
|
14
|
+
// Use Boyer-Moore-like approach for better performance
|
15
|
+
int skip[BM_BUFFER_SIZE];
|
16
|
+
size_t i, j;
|
17
|
+
|
18
|
+
// Initialize skip table
|
19
|
+
for (i = 0; i < BM_BUFFER_SIZE; i++)
|
20
|
+
skip[i] = needle_len;
|
21
|
+
|
22
|
+
for (i = 0; i < needle_len - 1; i++)
|
23
|
+
skip[toupper((unsigned char)needle[i]) % BM_BUFFER_SIZE] = needle_len - i - 1;
|
24
|
+
|
25
|
+
// Search
|
26
|
+
for (i = 0; i <= haystack_len - needle_len; i += skip[toupper((unsigned char)haystack[i + needle_len - 1]) % BM_BUFFER_SIZE]) {
|
27
|
+
for (j = 0; j < needle_len; j++) {
|
28
|
+
if (toupper((unsigned char)haystack[i + j]) != toupper((unsigned char)needle[j]))
|
29
|
+
break;
|
30
|
+
}
|
31
|
+
|
32
|
+
if (j == needle_len)
|
33
|
+
return (char*)(haystack + i);
|
34
|
+
}
|
35
|
+
|
36
|
+
return NULL;
|
37
|
+
}
|
38
|
+
#endif
|
39
|
+
|
40
|
+
// Fast serialized PHP string replacement with improved memory handling
|
41
|
+
static VALUE rb_serialized_str_replace(VALUE self, VALUE orig_str, VALUE old_str, VALUE new_str) {
|
42
|
+
// Ensure strings are properly initialized
|
43
|
+
Check_Type(orig_str, T_STRING);
|
44
|
+
Check_Type(old_str, T_STRING);
|
45
|
+
Check_Type(new_str, T_STRING);
|
46
|
+
|
47
|
+
char *orig = RSTRING_PTR(orig_str);
|
48
|
+
char *old = RSTRING_PTR(old_str);
|
49
|
+
char *new = RSTRING_PTR(new_str);
|
50
|
+
|
51
|
+
long orig_len = RSTRING_LEN(orig_str);
|
52
|
+
long old_len = RSTRING_LEN(old_str);
|
53
|
+
long new_len = RSTRING_LEN(new_str);
|
54
|
+
|
55
|
+
// Early optimization: if old and new are identical, return original
|
56
|
+
if (old_len == new_len && memcmp(old, new, old_len) == 0) {
|
57
|
+
return rb_str_dup(orig_str);
|
58
|
+
}
|
59
|
+
|
60
|
+
// Estimate result size more accurately to avoid reallocations
|
61
|
+
long max_replacements = orig_len / (old_len > 0 ? old_len : 1);
|
62
|
+
long size_diff = new_len - old_len;
|
63
|
+
long estimated_result_len = orig_len + (size_diff > 0 ? size_diff * max_replacements : 0) + 128;
|
64
|
+
|
65
|
+
// Pre-allocate result buffer
|
66
|
+
VALUE result = rb_str_new(NULL, estimated_result_len);
|
67
|
+
char *res_ptr = RSTRING_PTR(result);
|
68
|
+
long res_len = 0;
|
69
|
+
|
70
|
+
long i = 0;
|
71
|
+
while (i < orig_len) {
|
72
|
+
// Check for serialized string marker
|
73
|
+
if (i + 2 < orig_len && orig[i] == 's' && orig[i+1] == ':') {
|
74
|
+
char *endptr;
|
75
|
+
long len_pos = i + 2;
|
76
|
+
|
77
|
+
// Extract length value more safely
|
78
|
+
if (len_pos < orig_len) {
|
79
|
+
long len_val = strtol(orig + len_pos, &endptr, 10);
|
80
|
+
|
81
|
+
// Verify we found a valid PHP serialized string
|
82
|
+
if (endptr && *endptr == ':' && (endptr+1) < orig + orig_len && *(endptr+1) == '"') {
|
83
|
+
long content_start = (endptr + 2) - orig;
|
84
|
+
|
85
|
+
// Make sure content_start is within bounds
|
86
|
+
if (content_start < orig_len) {
|
87
|
+
// Only search within the actual serialized string content
|
88
|
+
long search_limit = content_start + len_val;
|
89
|
+
if (search_limit > orig_len) search_limit = orig_len;
|
90
|
+
|
91
|
+
char *found = strcasestr(orig + content_start, old);
|
92
|
+
|
93
|
+
// Found match within the serialized string content
|
94
|
+
if (found && found < orig + search_limit) {
|
95
|
+
// Verify we have enough space in result buffer (resize if needed)
|
96
|
+
long needed_len = res_len + (found - (orig + i)) + new_len + 100;
|
97
|
+
if (needed_len > estimated_result_len) {
|
98
|
+
rb_str_resize(result, needed_len * 2);
|
99
|
+
res_ptr = RSTRING_PTR(result);
|
100
|
+
estimated_result_len = needed_len * 2;
|
101
|
+
}
|
102
|
+
|
103
|
+
// Calculate new serialized string length
|
104
|
+
long new_len_val = len_val - old_len + new_len;
|
105
|
+
|
106
|
+
// Update the serialized string length indicator
|
107
|
+
char len_buf[32];
|
108
|
+
int len_digits = snprintf(len_buf, sizeof(len_buf), "s:%ld:", new_len_val);
|
109
|
+
|
110
|
+
// Copy prefix up to the 's:' marker
|
111
|
+
memcpy(res_ptr + res_len, orig + i, 2);
|
112
|
+
res_len += 2;
|
113
|
+
|
114
|
+
// Copy new length
|
115
|
+
memcpy(res_ptr + res_len, len_buf + 2, len_digits - 2);
|
116
|
+
res_len += len_digits - 2;
|
117
|
+
|
118
|
+
// Copy from length end to the found match position
|
119
|
+
long pre_len = found - (orig + content_start);
|
120
|
+
memcpy(res_ptr + res_len, endptr, pre_len + 2);
|
121
|
+
res_len += pre_len + 2;
|
122
|
+
|
123
|
+
// Copy the new replacement string
|
124
|
+
memcpy(res_ptr + res_len, new, new_len);
|
125
|
+
res_len += new_len;
|
126
|
+
|
127
|
+
// Skip to after the replacement point
|
128
|
+
i = found - orig + old_len;
|
129
|
+
continue;
|
130
|
+
}
|
131
|
+
}
|
132
|
+
}
|
133
|
+
}
|
134
|
+
}
|
135
|
+
|
136
|
+
// If we didn't perform a replacement, copy the current character
|
137
|
+
if (res_len >= estimated_result_len) {
|
138
|
+
rb_str_resize(result, estimated_result_len * 2);
|
139
|
+
res_ptr = RSTRING_PTR(result);
|
140
|
+
estimated_result_len *= 2;
|
141
|
+
}
|
142
|
+
res_ptr[res_len++] = orig[i++];
|
143
|
+
}
|
144
|
+
|
145
|
+
// Set final string length and terminate
|
146
|
+
rb_str_resize(result, res_len);
|
147
|
+
|
148
|
+
return result;
|
149
|
+
}
|
150
|
+
|
151
|
+
// Optimized batch string replacement
|
152
|
+
static VALUE rb_batch_replace(VALUE self, VALUE orig_str, VALUE replacements) {
|
153
|
+
Check_Type(orig_str, T_STRING);
|
154
|
+
Check_Type(replacements, T_ARRAY);
|
155
|
+
|
156
|
+
long replacements_count = RARRAY_LEN(replacements);
|
157
|
+
if (replacements_count == 0) {
|
158
|
+
return rb_str_dup(orig_str);
|
159
|
+
}
|
160
|
+
|
161
|
+
char *orig = RSTRING_PTR(orig_str);
|
162
|
+
long orig_len = RSTRING_LEN(orig_str);
|
163
|
+
|
164
|
+
// For small number of replacements, use a simpler algorithm
|
165
|
+
if (replacements_count <= 3) {
|
166
|
+
VALUE result = rb_str_dup(orig_str);
|
167
|
+
|
168
|
+
for (long i = 0; i < replacements_count; i++) {
|
169
|
+
VALUE pair = rb_ary_entry(replacements, i);
|
170
|
+
VALUE old_val = rb_ary_entry(pair, 0);
|
171
|
+
VALUE new_val = rb_ary_entry(pair, 1);
|
172
|
+
|
173
|
+
// Use Ruby's gsub! for simplicity with small numbers of replacements
|
174
|
+
result = rb_funcall(result, rb_intern("gsub!"), 2, old_val, new_val);
|
175
|
+
if (NIL_P(result)) {
|
176
|
+
// If no replacements were made, restore original
|
177
|
+
result = rb_str_dup(orig_str);
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
181
|
+
return result;
|
182
|
+
}
|
183
|
+
|
184
|
+
// For larger numbers of replacements, use a more efficient algorithm
|
185
|
+
// Calculate maximum possible result length
|
186
|
+
long max_result_len = orig_len;
|
187
|
+
for (long i = 0; i < replacements_count; i++) {
|
188
|
+
VALUE pair = rb_ary_entry(replacements, i);
|
189
|
+
VALUE old_str = rb_ary_entry(pair, 0);
|
190
|
+
VALUE new_str = rb_ary_entry(pair, 1);
|
191
|
+
|
192
|
+
long old_len = RSTRING_LEN(old_str);
|
193
|
+
long new_len = RSTRING_LEN(new_str);
|
194
|
+
|
195
|
+
if (new_len > old_len) {
|
196
|
+
// Add extra space for each potential replacement
|
197
|
+
long potential_replacements = old_len > 0 ? orig_len / old_len : 0;
|
198
|
+
max_result_len += potential_replacements * (new_len - old_len);
|
199
|
+
}
|
200
|
+
}
|
201
|
+
|
202
|
+
// Allocate result buffer with extra room
|
203
|
+
VALUE result = rb_str_new(NULL, max_result_len + 1024);
|
204
|
+
char *res_ptr = RSTRING_PTR(result);
|
205
|
+
long res_len = 0;
|
206
|
+
|
207
|
+
// Store pattern data for faster lookup
|
208
|
+
char **patterns = ALLOCA_N(char*, replacements_count);
|
209
|
+
char **replacements_ptr = ALLOCA_N(char*, replacements_count);
|
210
|
+
long *pattern_lens = ALLOCA_N(long, replacements_count);
|
211
|
+
long *replacement_lens = ALLOCA_N(long, replacements_count);
|
212
|
+
|
213
|
+
for (long i = 0; i < replacements_count; i++) {
|
214
|
+
VALUE pair = rb_ary_entry(replacements, i);
|
215
|
+
VALUE old_str = rb_ary_entry(pair, 0);
|
216
|
+
VALUE new_str = rb_ary_entry(pair, 1);
|
217
|
+
|
218
|
+
patterns[i] = RSTRING_PTR(old_str);
|
219
|
+
pattern_lens[i] = RSTRING_LEN(old_str);
|
220
|
+
replacements_ptr[i] = RSTRING_PTR(new_str);
|
221
|
+
replacement_lens[i] = RSTRING_LEN(new_str);
|
222
|
+
}
|
223
|
+
|
224
|
+
// Process the string in a single pass
|
225
|
+
long i = 0;
|
226
|
+
while (i < orig_len) {
|
227
|
+
int matched = 0;
|
228
|
+
|
229
|
+
// Find longest matching pattern at current position
|
230
|
+
long best_match_len = 0;
|
231
|
+
long best_match_idx = -1;
|
232
|
+
|
233
|
+
for (long j = 0; j < replacements_count; j++) {
|
234
|
+
long pattern_len = pattern_lens[j];
|
235
|
+
|
236
|
+
// Skip if we don't have enough characters left
|
237
|
+
if (i + pattern_len > orig_len || pattern_len <= 0) continue;
|
238
|
+
|
239
|
+
// Only consider if this would be a longer match
|
240
|
+
if (pattern_len > best_match_len) {
|
241
|
+
// Check if this pattern matches at current position
|
242
|
+
if (memcmp(orig + i, patterns[j], pattern_len) == 0) {
|
243
|
+
best_match_len = pattern_len;
|
244
|
+
best_match_idx = j;
|
245
|
+
matched = 1;
|
246
|
+
}
|
247
|
+
}
|
248
|
+
}
|
249
|
+
|
250
|
+
if (matched) {
|
251
|
+
// Apply the best match
|
252
|
+
long replace_len = replacement_lens[best_match_idx];
|
253
|
+
|
254
|
+
// Ensure we have room in the result buffer
|
255
|
+
if (res_len + replace_len >= max_result_len) {
|
256
|
+
max_result_len *= 2;
|
257
|
+
rb_str_resize(result, max_result_len);
|
258
|
+
res_ptr = RSTRING_PTR(result);
|
259
|
+
}
|
260
|
+
|
261
|
+
// Copy replacement string
|
262
|
+
memcpy(res_ptr + res_len, replacements_ptr[best_match_idx], replace_len);
|
263
|
+
res_len += replace_len;
|
264
|
+
|
265
|
+
// Advance past the matched pattern
|
266
|
+
i += best_match_len;
|
267
|
+
} else {
|
268
|
+
// No match, copy the current character
|
269
|
+
if (res_len >= max_result_len) {
|
270
|
+
max_result_len *= 2;
|
271
|
+
rb_str_resize(result, max_result_len);
|
272
|
+
res_ptr = RSTRING_PTR(result);
|
273
|
+
}
|
274
|
+
res_ptr[res_len++] = orig[i++];
|
275
|
+
}
|
276
|
+
}
|
277
|
+
|
278
|
+
// Resize to actual length
|
279
|
+
rb_str_resize(result, res_len);
|
280
|
+
|
281
|
+
return result;
|
282
|
+
}
|
283
|
+
|
284
|
+
// Benchmark function to measure performance
|
285
|
+
static VALUE rb_benchmark(VALUE self, VALUE iterations, VALUE orig_str, VALUE replacements) {
|
286
|
+
Check_Type(iterations, T_FIXNUM);
|
287
|
+
Check_Type(orig_str, T_STRING);
|
288
|
+
Check_Type(replacements, T_ARRAY);
|
289
|
+
|
290
|
+
long iter = FIX2LONG(iterations);
|
291
|
+
VALUE result = Qnil;
|
292
|
+
|
293
|
+
for (long i = 0; i < iter; i++) {
|
294
|
+
// Free the previous result to avoid memory buildup
|
295
|
+
if (!NIL_P(result)) {
|
296
|
+
result = Qnil;
|
297
|
+
}
|
298
|
+
result = rb_batch_replace(self, orig_str, replacements);
|
299
|
+
}
|
300
|
+
|
301
|
+
return result;
|
302
|
+
}
|
303
|
+
|
304
|
+
// Module initialization
|
305
|
+
void Init_table_string_replacer() {
|
306
|
+
VALUE mStringReplacer = rb_define_module("TableStringReplacer");
|
307
|
+
|
308
|
+
// Core functionality
|
309
|
+
rb_define_singleton_method(mStringReplacer, "serialized_str_replace", rb_serialized_str_replace, 3);
|
310
|
+
rb_define_singleton_method(mStringReplacer, "batch_replace", rb_batch_replace, 2);
|
311
|
+
|
312
|
+
// Benchmarking utility
|
313
|
+
rb_define_singleton_method(mStringReplacer, "benchmark", rb_benchmark, 3);
|
314
|
+
|
315
|
+
// Constants for thread safety documentation
|
316
|
+
rb_define_const(mStringReplacer, "THREAD_SAFE", Qtrue);
|
317
|
+
rb_define_const(mStringReplacer, "VERSION", rb_str_new_cstr(RSTRING_PTR(rb_const_get(mStringReplacer, rb_intern("VERSION")))));
|
318
|
+
}
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module TableStringReplacer
|
2
|
+
# Benchmarking utilities for TableStringReplacer
|
3
|
+
module Benchmarking
|
4
|
+
# Compares performance between this gem and Ruby's native string replacement
|
5
|
+
#
|
6
|
+
# @param str [String] Input string to test against
|
7
|
+
# @param replacements [Array<Array<String, String>>] Array of [old, new] pairs
|
8
|
+
# @param iterations [Integer] Number of times to run the test
|
9
|
+
# @return [Hash] Timing results showing performance comparison
|
10
|
+
def self.compare_with_ruby(str, replacements, iterations = 1000)
|
11
|
+
require 'benchmark'
|
12
|
+
|
13
|
+
# Clone inputs to ensure we have identical test cases
|
14
|
+
ruby_str = str.dup
|
15
|
+
c_str = str.dup
|
16
|
+
|
17
|
+
# Convert replacements for Ruby's format if needed
|
18
|
+
ruby_replacements = {}
|
19
|
+
replacements.each { |old, new| ruby_replacements[old] = new }
|
20
|
+
|
21
|
+
results = {}
|
22
|
+
|
23
|
+
results[:ruby] = Benchmark.measure do
|
24
|
+
iterations.times do
|
25
|
+
ruby_replacements.each do |old, new|
|
26
|
+
ruby_str.gsub!(old, new)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
results[:c_extension] = Benchmark.measure do
|
32
|
+
iterations.times do
|
33
|
+
c_str = TableStringReplacer.batch_replace(c_str, replacements)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
results[:speedup] = results[:ruby].real / results[:c_extension].real
|
38
|
+
|
39
|
+
results
|
40
|
+
end
|
41
|
+
|
42
|
+
# Tests thread safety by running multiple threads simultaneously
|
43
|
+
#
|
44
|
+
# @param threads [Integer] Number of threads to run concurrently
|
45
|
+
# @param iterations [Integer] Number of replacements per thread
|
46
|
+
# @return [Boolean] True if all threads completed successfully
|
47
|
+
def self.test_thread_safety(threads = 10, iterations = 1000)
|
48
|
+
replacements = [["old1", "new1"], ["old2", "new2"], ["old3", "new3"]]
|
49
|
+
str = "This is old1 test string with old2 and old3 values"
|
50
|
+
|
51
|
+
# Create arrays to store results
|
52
|
+
results = []
|
53
|
+
errors = []
|
54
|
+
|
55
|
+
# Create and start threads
|
56
|
+
thread_group = Array.new(threads) do |i|
|
57
|
+
Thread.new do
|
58
|
+
begin
|
59
|
+
thread_result = nil
|
60
|
+
iterations.times do
|
61
|
+
thread_result = TableStringReplacer.batch_replace(str, replacements)
|
62
|
+
end
|
63
|
+
results[i] = thread_result
|
64
|
+
rescue => e
|
65
|
+
errors << e
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Wait for all threads to complete
|
71
|
+
thread_group.each(&:join)
|
72
|
+
|
73
|
+
# Check results
|
74
|
+
success = errors.empty? && results.compact.size == threads
|
75
|
+
success ? true : { success: false, errors: errors }
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require "table_string_replacer/version"
|
2
|
+
# Load the compiled C extension
|
3
|
+
require "table_string_replacer/table_string_replacer"
|
4
|
+
|
5
|
+
module TableStringReplacer
|
6
|
+
class Error < StandardError; end
|
7
|
+
|
8
|
+
# This module provides high-performance string replacements
|
9
|
+
# optimized for WordPress serialized data.
|
10
|
+
#
|
11
|
+
# All methods are thread-safe and can be called concurrently
|
12
|
+
# from multiple threads without any issues.
|
13
|
+
|
14
|
+
# Returns true if a benchmark shows this gem is faster than
|
15
|
+
# native Ruby string replacement for your workload
|
16
|
+
#
|
17
|
+
# @param str [String] Sample input string
|
18
|
+
# @param replacements [Array<Array<String, String>>] Array of [old, new] pairs
|
19
|
+
# @return [Boolean] true if the C extension is faster
|
20
|
+
def self.faster_than_ruby?(str, replacements)
|
21
|
+
start_time = Time.now
|
22
|
+
100.times { str.gsub(replacements.first[0], replacements.first[1]) }
|
23
|
+
ruby_time = Time.now - start_time
|
24
|
+
|
25
|
+
start_time = Time.now
|
26
|
+
100.times { TableStringReplacer.batch_replace(str, replacements) }
|
27
|
+
c_time = Time.now - start_time
|
28
|
+
|
29
|
+
c_time < ruby_time
|
30
|
+
end
|
31
|
+
end
|
metadata
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: table_string_replacer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ANKIT KHANDELWAL
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2025-03-18 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A C extension that provides optimized string replacement, specially handling
|
14
|
+
PHP serialized strings
|
15
|
+
email:
|
16
|
+
- bv-ankit@blogvault.net
|
17
|
+
executables: []
|
18
|
+
extensions:
|
19
|
+
- ext/table_string_replacer/extconf.rb
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- CHANGELOG.md
|
23
|
+
- LICENSE.txt
|
24
|
+
- README.md
|
25
|
+
- ext/table_string_replacer/extconf.rb
|
26
|
+
- ext/table_string_replacer/table_string_replacer.c
|
27
|
+
- lib/table_string_replacer.rb
|
28
|
+
- lib/table_string_replacer/benchmarking.rb
|
29
|
+
- lib/table_string_replacer/version.rb
|
30
|
+
homepage: https://github.com/bv-ankit/table_string_replacer
|
31
|
+
licenses:
|
32
|
+
- MIT
|
33
|
+
metadata:
|
34
|
+
homepage_uri: https://github.com/bv-ankit/table_string_replacer
|
35
|
+
source_code_uri: https://github.com/bv-ankit/table_string_replacer
|
36
|
+
changelog_uri: https://github.com/bv-ankit/table_string_replacer/blob/master/CHANGELOG.md
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options: []
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 2.5.0
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
requirements: []
|
52
|
+
rubygems_version: 3.5.18
|
53
|
+
signing_key:
|
54
|
+
specification_version: 4
|
55
|
+
summary: Fast string replacement operations with PHP serialization support
|
56
|
+
test_files: []
|