table_string_replacer 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -1
- data/ext/table_string_replacer/table_string_replacer.c +209 -49
- data/lib/table_string_replacer/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8d215391ea5818f845c778f73cc12cffad6ebba54d23a47fd6c465d13111ff61
|
4
|
+
data.tar.gz: deab5736893ab931455a4b18e8534af239ae7ee300fc2454dd6ad014f0dd56af
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1fb3e48330c7f87f78acb625e7fe0f29b8c8a64c7da79970fc16c16c4c75123352a9091de8604c2a796780d6e6c08e98e5e3ac278c992b54c99d326002c10ec0
|
7
|
+
data.tar.gz: 6d7a709a91cbeacce28c23ba30301c9d580f92bd0014189d0626a62eb3bd49eca205b8f99857f7f1a1350610587668b02b31158d62071ea789830cd0ba7e08d1
|
data/README.md
CHANGED
@@ -60,9 +60,24 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
|
60
60
|
|
61
61
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
62
62
|
|
63
|
+
## Updating the Gem
|
64
|
+
|
65
|
+
1. Make your code changes and ensure all tests pass with `rake test`
|
66
|
+
2. Update the version number in `lib/table_string_replacer/version.rb` following [Semantic Versioning](https://semver.org/) principles
|
67
|
+
3. Update the CHANGELOG.md file with your changes
|
68
|
+
4. Commit your changes to git
|
69
|
+
5. Run `bundle exec rake release`, which will:
|
70
|
+
- Create a git tag for the version
|
71
|
+
- Push git commits and tags
|
72
|
+
- Build the gem and push it to RubyGems.org
|
73
|
+
|
74
|
+
Alternatively, you can do these steps manually:
|
75
|
+
1. Build the gem: `gem build table_string_replacer.gemspec`
|
76
|
+
2. Push to RubyGems: `gem push table_string_replacer-x.x.x.gem` (where x.x.x is the version number)
|
77
|
+
|
63
78
|
## Contributing
|
64
79
|
|
65
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
80
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/bv-ankit/table_string_replacer.
|
66
81
|
|
67
82
|
## License
|
68
83
|
|
@@ -37,7 +37,56 @@ char *strcasestr(const char *haystack, const char *needle) {
|
|
37
37
|
}
|
38
38
|
#endif
|
39
39
|
|
40
|
-
//
|
40
|
+
// Helper function for binary-safe case-insensitive string search
|
41
|
+
static char* binary_strcasestr(const char* haystack, long haystack_len, const char* needle, long needle_len) {
|
42
|
+
if (needle_len == 0) return (char*)haystack;
|
43
|
+
if (haystack_len < needle_len) return NULL;
|
44
|
+
|
45
|
+
for (long i = 0; i <= haystack_len - needle_len; i++) {
|
46
|
+
long j;
|
47
|
+
for (j = 0; j < needle_len; j++) {
|
48
|
+
char h = haystack[i + j];
|
49
|
+
char n = needle[j];
|
50
|
+
if (toupper((unsigned char)h) != toupper((unsigned char)n))
|
51
|
+
break;
|
52
|
+
}
|
53
|
+
if (j == needle_len)
|
54
|
+
return (char*)(haystack + i);
|
55
|
+
}
|
56
|
+
|
57
|
+
return NULL;
|
58
|
+
}
|
59
|
+
|
60
|
+
// Extract serialized string length safely handling binary content
|
61
|
+
static long extract_serialized_len(const char* str, long max_len, long start_pos, char** endptr) {
|
62
|
+
// Ensure we have enough chars for a valid length
|
63
|
+
if (start_pos + 1 >= max_len) {
|
64
|
+
*endptr = NULL;
|
65
|
+
return 0;
|
66
|
+
}
|
67
|
+
|
68
|
+
// Skip to first digit
|
69
|
+
long pos = start_pos;
|
70
|
+
while (pos < max_len && !isdigit(str[pos])) pos++;
|
71
|
+
|
72
|
+
// Extract digits until we hit a colon
|
73
|
+
long val = 0;
|
74
|
+
while (pos < max_len && isdigit(str[pos])) {
|
75
|
+
val = val * 10 + (str[pos] - '0');
|
76
|
+
pos++;
|
77
|
+
}
|
78
|
+
|
79
|
+
// Check for valid format (must end with colon)
|
80
|
+
if (pos < max_len && str[pos] == ':') {
|
81
|
+
*endptr = (char*)(str + pos);
|
82
|
+
return val;
|
83
|
+
} else {
|
84
|
+
*endptr = NULL;
|
85
|
+
return 0;
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
// Fast serialized PHP string replacement with improved memory handling and binary string support
|
41
90
|
static VALUE rb_serialized_str_replace(VALUE self, VALUE orig_str, VALUE old_str, VALUE new_str) {
|
42
91
|
// Ensure strings are properly initialized
|
43
92
|
Check_Type(orig_str, T_STRING);
|
@@ -52,97 +101,208 @@ static VALUE rb_serialized_str_replace(VALUE self, VALUE orig_str, VALUE old_str
|
|
52
101
|
long old_len = RSTRING_LEN(old_str);
|
53
102
|
long new_len = RSTRING_LEN(new_str);
|
54
103
|
|
104
|
+
// Early optimization: if old string is empty or original is empty, return original
|
105
|
+
if (old_len == 0 || orig_len == 0) {
|
106
|
+
return rb_str_dup(orig_str);
|
107
|
+
}
|
108
|
+
|
55
109
|
// Early optimization: if old and new are identical, return original
|
56
110
|
if (old_len == new_len && memcmp(old, new, old_len) == 0) {
|
57
111
|
return rb_str_dup(orig_str);
|
58
112
|
}
|
59
113
|
|
60
|
-
//
|
61
|
-
long
|
114
|
+
// Pre-compute a more accurate size estimate by counting potential matches first
|
115
|
+
long count = 0;
|
116
|
+
long i = 0;
|
117
|
+
|
118
|
+
// First-pass to count potential replacements in serialized strings
|
119
|
+
while (i < orig_len) {
|
120
|
+
// Look for serialized string marker pattern 's:'
|
121
|
+
if (i + 2 < orig_len && orig[i] == 's' && orig[i+1] == ':') {
|
122
|
+
char *endptr;
|
123
|
+
long len_val = extract_serialized_len(orig, orig_len, i+2, &endptr);
|
124
|
+
|
125
|
+
// Valid PHP serialized string format: s:N:"content";
|
126
|
+
if (endptr && (endptr+1) < orig + orig_len && *(endptr+1) == '"') {
|
127
|
+
long content_start = (endptr + 2) - orig;
|
128
|
+
|
129
|
+
if (content_start < orig_len) {
|
130
|
+
// Only search within the actual serialized string content
|
131
|
+
long search_limit = content_start + len_val;
|
132
|
+
if (search_limit > orig_len) search_limit = orig_len;
|
133
|
+
|
134
|
+
// Count occurrences within this serialized string
|
135
|
+
char *pos = orig + content_start;
|
136
|
+
char *end = orig + search_limit;
|
137
|
+
long remaining = search_limit - content_start;
|
138
|
+
|
139
|
+
while (remaining >= old_len) {
|
140
|
+
char *found = binary_strcasestr(pos, remaining, old, old_len);
|
141
|
+
if (found && found < end) {
|
142
|
+
count++;
|
143
|
+
long advance = found - pos + old_len;
|
144
|
+
pos += advance;
|
145
|
+
remaining -= advance;
|
146
|
+
} else {
|
147
|
+
break;
|
148
|
+
}
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
// Skip past this serialized string entirely for the counting phase
|
153
|
+
i = content_start + len_val;
|
154
|
+
// Skip past closing quote and semicolon if present
|
155
|
+
if (i < orig_len && orig[i] == '"') i++;
|
156
|
+
if (i < orig_len && orig[i] == ';') i++;
|
157
|
+
continue;
|
158
|
+
}
|
159
|
+
}
|
160
|
+
i++;
|
161
|
+
}
|
162
|
+
|
163
|
+
// Optimize allocation with more precise size calculation
|
62
164
|
long size_diff = new_len - old_len;
|
63
|
-
long estimated_result_len = orig_len + (size_diff > 0 ? size_diff *
|
165
|
+
long estimated_result_len = orig_len + (size_diff > 0 ? size_diff * count : 0) + 256;
|
64
166
|
|
65
|
-
// Pre-allocate result buffer
|
167
|
+
// Pre-allocate result buffer - slightly oversized to minimize reallocations
|
66
168
|
VALUE result = rb_str_new(NULL, estimated_result_len);
|
67
169
|
char *res_ptr = RSTRING_PTR(result);
|
68
170
|
long res_len = 0;
|
69
171
|
|
70
|
-
|
172
|
+
// Cache the first character of the search string for faster initial checks
|
173
|
+
unsigned char first_char = (unsigned char)old[0];
|
174
|
+
unsigned char first_char_upper = toupper(first_char);
|
175
|
+
unsigned char first_char_lower = tolower(first_char);
|
176
|
+
|
177
|
+
// Second pass to perform the actual replacements
|
178
|
+
i = 0;
|
71
179
|
while (i < orig_len) {
|
72
|
-
//
|
180
|
+
// Look for serialized string marker pattern 's:'
|
73
181
|
if (i + 2 < orig_len && orig[i] == 's' && orig[i+1] == ':') {
|
74
182
|
char *endptr;
|
75
|
-
long
|
183
|
+
long len_val = extract_serialized_len(orig, orig_len, i+2, &endptr);
|
76
184
|
|
77
|
-
//
|
78
|
-
if (
|
79
|
-
long
|
185
|
+
// Valid PHP serialized string format: s:N:"content";
|
186
|
+
if (endptr && (endptr+1) < orig + orig_len && *(endptr+1) == '"') {
|
187
|
+
long content_start = (endptr + 2) - orig;
|
80
188
|
|
81
|
-
|
82
|
-
|
83
|
-
long
|
189
|
+
if (content_start < orig_len) {
|
190
|
+
// Only search within the actual serialized string content
|
191
|
+
long search_limit = content_start + len_val;
|
192
|
+
if (search_limit > orig_len) search_limit = orig_len;
|
84
193
|
|
85
|
-
//
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
194
|
+
// Fast path: check if this serialized string might contain our pattern
|
195
|
+
// by checking for the first character before doing a full search
|
196
|
+
int potential_match = 0;
|
197
|
+
for (long scan_pos = content_start; scan_pos < search_limit; scan_pos++) {
|
198
|
+
unsigned char c = (unsigned char)orig[scan_pos];
|
199
|
+
if (c == first_char || toupper(c) == first_char_upper ||
|
200
|
+
tolower(c) == first_char_lower) {
|
201
|
+
potential_match = 1;
|
202
|
+
break;
|
203
|
+
}
|
204
|
+
}
|
205
|
+
|
206
|
+
// Only do full search if potential match found
|
207
|
+
if (potential_match) {
|
208
|
+
char *pos = orig + content_start;
|
209
|
+
char *end = orig + search_limit;
|
210
|
+
long remaining = search_limit - content_start;
|
90
211
|
|
91
|
-
char *found =
|
212
|
+
char *found = binary_strcasestr(pos, remaining, old, old_len);
|
92
213
|
|
93
214
|
// Found match within the serialized string content
|
94
|
-
if (found && found <
|
95
|
-
//
|
96
|
-
long
|
97
|
-
|
98
|
-
rb_str_resize(result, needed_len * 2);
|
99
|
-
res_ptr = RSTRING_PTR(result);
|
100
|
-
estimated_result_len = needed_len * 2;
|
101
|
-
}
|
102
|
-
|
215
|
+
if (found && found < end) {
|
216
|
+
// Before applying replacement, ensure we have enough space
|
217
|
+
long prefix_len = found - pos;
|
218
|
+
|
103
219
|
// Calculate new serialized string length
|
104
|
-
long
|
220
|
+
long modified_len = len_val + (new_len - old_len);
|
105
221
|
|
106
|
-
//
|
222
|
+
// Build the new serialized string header
|
107
223
|
char len_buf[32];
|
108
|
-
int len_digits = snprintf(len_buf, sizeof(len_buf), "s:%ld:",
|
224
|
+
int len_digits = snprintf(len_buf, sizeof(len_buf), "s:%ld:", modified_len);
|
109
225
|
|
110
|
-
//
|
111
|
-
|
112
|
-
|
226
|
+
// Ensure we have space in result buffer
|
227
|
+
long needed_space = res_len + len_digits + prefix_len + new_len +
|
228
|
+
(search_limit - (found + old_len)) + 16;
|
229
|
+
if (needed_space > estimated_result_len) {
|
230
|
+
estimated_result_len = (estimated_result_len * 3) / 2;
|
231
|
+
if (estimated_result_len < needed_space)
|
232
|
+
estimated_result_len = needed_space + 256;
|
233
|
+
rb_str_resize(result, estimated_result_len);
|
234
|
+
res_ptr = RSTRING_PTR(result);
|
235
|
+
}
|
113
236
|
|
114
|
-
// Copy
|
115
|
-
memcpy(res_ptr + res_len, len_buf
|
116
|
-
res_len += len_digits
|
237
|
+
// Copy the serialized string header
|
238
|
+
memcpy(res_ptr + res_len, len_buf, len_digits);
|
239
|
+
res_len += len_digits;
|
117
240
|
|
118
|
-
// Copy
|
119
|
-
|
120
|
-
memcpy(res_ptr + res_len,
|
121
|
-
res_len +=
|
241
|
+
// Copy quote and prefix content
|
242
|
+
res_ptr[res_len++] = '"';
|
243
|
+
memcpy(res_ptr + res_len, pos, prefix_len);
|
244
|
+
res_len += prefix_len;
|
122
245
|
|
123
|
-
// Copy the
|
246
|
+
// Copy the replacement string
|
124
247
|
memcpy(res_ptr + res_len, new, new_len);
|
125
248
|
res_len += new_len;
|
126
249
|
|
127
|
-
//
|
128
|
-
|
250
|
+
// Copy remainder of serialized string
|
251
|
+
long suffix_len = search_limit - (found + old_len);
|
252
|
+
if (suffix_len > 0) {
|
253
|
+
memcpy(res_ptr + res_len, found + old_len, suffix_len);
|
254
|
+
res_len += suffix_len;
|
255
|
+
}
|
256
|
+
|
257
|
+
// Add closing quote and semicolon
|
258
|
+
res_ptr[res_len++] = '"';
|
259
|
+
res_ptr[res_len++] = ';';
|
260
|
+
|
261
|
+
// Skip past the entire processed serialized string
|
262
|
+
i = search_limit;
|
263
|
+
// Skip past closing quote and semicolon if present
|
264
|
+
if (i < orig_len && orig[i] == '"') i++;
|
265
|
+
if (i < orig_len && orig[i] == ';') i++;
|
129
266
|
continue;
|
130
267
|
}
|
131
268
|
}
|
269
|
+
|
270
|
+
// No match found, copy serialized string as is
|
271
|
+
long str_total_len;
|
272
|
+
long end_pos = search_limit;
|
273
|
+
|
274
|
+
// Find the end of the serialized string (should be quote+semicolon)
|
275
|
+
if (end_pos < orig_len && orig[end_pos] == '"') end_pos++;
|
276
|
+
if (end_pos < orig_len && orig[end_pos] == ';') end_pos++;
|
277
|
+
|
278
|
+
str_total_len = end_pos - i;
|
279
|
+
|
280
|
+
if (res_len + str_total_len > estimated_result_len) {
|
281
|
+
estimated_result_len = (estimated_result_len * 3) / 2;
|
282
|
+
if (estimated_result_len < res_len + str_total_len)
|
283
|
+
estimated_result_len = res_len + str_total_len + 256;
|
284
|
+
rb_str_resize(result, estimated_result_len);
|
285
|
+
res_ptr = RSTRING_PTR(result);
|
286
|
+
}
|
287
|
+
|
288
|
+
memcpy(res_ptr + res_len, orig + i, str_total_len);
|
289
|
+
res_len += str_total_len;
|
290
|
+
i += str_total_len;
|
291
|
+
continue;
|
132
292
|
}
|
133
293
|
}
|
134
294
|
}
|
135
295
|
|
136
296
|
// If we didn't perform a replacement, copy the current character
|
137
297
|
if (res_len >= estimated_result_len) {
|
138
|
-
|
298
|
+
estimated_result_len = (estimated_result_len * 3) / 2;
|
299
|
+
rb_str_resize(result, estimated_result_len);
|
139
300
|
res_ptr = RSTRING_PTR(result);
|
140
|
-
estimated_result_len *= 2;
|
141
301
|
}
|
142
302
|
res_ptr[res_len++] = orig[i++];
|
143
303
|
}
|
144
304
|
|
145
|
-
// Set final string length
|
305
|
+
// Set final string length
|
146
306
|
rb_str_resize(result, res_len);
|
147
307
|
|
148
308
|
return result;
|
@@ -315,4 +475,4 @@ void Init_table_string_replacer() {
|
|
315
475
|
// Constants for thread safety documentation
|
316
476
|
rb_define_const(mStringReplacer, "THREAD_SAFE", Qtrue);
|
317
477
|
rb_define_const(mStringReplacer, "VERSION", rb_str_new_cstr(RSTRING_PTR(rb_const_get(mStringReplacer, rb_intern("VERSION")))));
|
318
|
-
}
|
478
|
+
}
|