string-scrub 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9eb9dbb53755095ddf297dcadd31df955a8d40aa
4
+ data.tar.gz: ff3692ef538237cfe5cd8da80136744b933102e5
5
+ SHA512:
6
+ metadata.gz: 367c9b6389befefdf01757bed6b0cf8e6e6100155a0a5ca886299be886391f78d48765b7d3e6bd13555805d620a51d57e643e1e2fc735297492be5c03b6bffa2
7
+ data.tar.gz: c76f4019b64dcdd6794124015351b6109ffd1fb247a5b36ea4e9aaef4fa5c6c58732a47f2c01ddf68c3b0aefc6cbb8d28964e4c57815c76f06317127b95abf52
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ test/tmp
15
+ test/version_tmp
16
+ tmp
17
+ ext/string/*.o
18
+ ext/string/Makefile
19
+ ext/string/*.bundle
20
+ ext/string/*.so
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in string-scrub.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 SHIBATA Hiroshi
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # String::Scrub
2
+
3
+ String#scrub for Ruby 2.0.0
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'string-scrub'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install string-scrub
18
+
19
+ ## Usage
20
+
21
+ see [testcase](https://github.com/hsbt/string-scrub/blob/master/test/test_scrub.rb)
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile('string/scrub')
@@ -0,0 +1,366 @@
1
+ #include <ruby.h>
2
+ #include <ruby/encoding.h>
3
+
4
+ #ifndef TRUE
5
+ #define TRUE 1
6
+ #endif
7
+ #ifndef FALSE
8
+ #define FALSE 0
9
+ #endif
10
+
11
+ #define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
12
+
13
+ static inline const char *
14
+ search_nonascii(const char *p, const char *e)
15
+ {
16
+ #if SIZEOF_VALUE == 8
17
+ # define NONASCII_MASK 0x8080808080808080ULL
18
+ #elif SIZEOF_VALUE == 4
19
+ # define NONASCII_MASK 0x80808080UL
20
+ #endif
21
+ #ifdef NONASCII_MASK
22
+ if ((int)sizeof(VALUE) * 2 < e - p) {
23
+ const VALUE *s, *t;
24
+ const VALUE lowbits = sizeof(VALUE) - 1;
25
+ s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
26
+ while (p < (const char *)s) {
27
+ if (!ISASCII(*p))
28
+ return p;
29
+ p++;
30
+ }
31
+ t = (const VALUE*)(~lowbits & (VALUE)e);
32
+ while (s < t) {
33
+ if (*s & NONASCII_MASK) {
34
+ t = s;
35
+ break;
36
+ }
37
+ s++;
38
+ }
39
+ p = (const char *)t;
40
+ }
41
+ #endif
42
+ while (p < e) {
43
+ if (!ISASCII(*p))
44
+ return p;
45
+ p++;
46
+ }
47
+ return NULL;
48
+ }
49
+
50
+ static VALUE
51
+ str_compat_and_valid(VALUE str, rb_encoding *enc)
52
+ {
53
+ int cr;
54
+ str = StringValue(str);
55
+ cr = rb_enc_str_coderange(str);
56
+ if (cr == ENC_CODERANGE_BROKEN) {
57
+ rb_raise(rb_eArgError, "replacement must be valid byte sequence '%+"PRIsVALUE"'", str);
58
+ }
59
+ else if (cr == ENC_CODERANGE_7BIT) {
60
+ rb_encoding *e = STR_ENC_GET(str);
61
+ if (!rb_enc_asciicompat(enc)) {
62
+ rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
63
+ rb_enc_name(enc), rb_enc_name(e));
64
+ }
65
+ }
66
+ else { /* ENC_CODERANGE_VALID */
67
+ rb_encoding *e = STR_ENC_GET(str);
68
+ if (enc != e) {
69
+ rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
70
+ rb_enc_name(enc), rb_enc_name(e));
71
+ }
72
+ }
73
+ return str;
74
+ }
75
+
76
+ /**
77
+ * @param repl the replacement character
78
+ * @return If given string is invalid, returns a new string. Otherwise, returns Qnil.
79
+ */
80
+ static VALUE
81
+ str_scrub0(int argc, VALUE *argv, VALUE str)
82
+ {
83
+ int cr = ENC_CODERANGE(str);
84
+ rb_encoding *enc;
85
+ int encidx;
86
+ VALUE repl;
87
+
88
+ if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID)
89
+ return Qnil;
90
+
91
+ enc = STR_ENC_GET(str);
92
+ rb_scan_args(argc, argv, "01", &repl);
93
+ if (argc != 0) {
94
+ repl = str_compat_and_valid(repl, enc);
95
+ }
96
+
97
+ if (rb_enc_dummy_p(enc)) {
98
+ return Qnil;
99
+ }
100
+ encidx = rb_enc_to_index(enc);
101
+
102
+ #define DEFAULT_REPLACE_CHAR(str) do { \
103
+ static const char replace[sizeof(str)-1] = str; \
104
+ rep = replace; replen = (int)sizeof(replace); \
105
+ } while (0)
106
+
107
+ if (rb_enc_asciicompat(enc)) {
108
+ const char *p = RSTRING_PTR(str);
109
+ const char *e = RSTRING_END(str);
110
+ const char *p1 = p;
111
+ const char *rep;
112
+ long replen;
113
+ int rep7bit_p;
114
+ VALUE buf = Qnil;
115
+ if (rb_block_given_p()) {
116
+ rep = NULL;
117
+ replen = 0;
118
+ rep7bit_p = FALSE;
119
+ }
120
+ else if (!NIL_P(repl)) {
121
+ rep = RSTRING_PTR(repl);
122
+ replen = RSTRING_LEN(repl);
123
+ rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT);
124
+ }
125
+ else if (encidx == rb_utf8_encindex()) {
126
+ DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD");
127
+ rep7bit_p = FALSE;
128
+ }
129
+ else {
130
+ DEFAULT_REPLACE_CHAR("?");
131
+ rep7bit_p = TRUE;
132
+ }
133
+ cr = ENC_CODERANGE_7BIT;
134
+
135
+ p = search_nonascii(p, e);
136
+ if (!p) {
137
+ p = e;
138
+ }
139
+ while (p < e) {
140
+ int ret = rb_enc_precise_mbclen(p, e, enc);
141
+ if (MBCLEN_NEEDMORE_P(ret)) {
142
+ break;
143
+ }
144
+ else if (MBCLEN_CHARFOUND_P(ret)) {
145
+ cr = ENC_CODERANGE_VALID;
146
+ p += MBCLEN_CHARFOUND_LEN(ret);
147
+ }
148
+ else if (MBCLEN_INVALID_P(ret)) {
149
+ /*
150
+ * p1~p: valid ascii/multibyte chars
151
+ * p ~e: invalid bytes + unknown bytes
152
+ */
153
+ long clen = rb_enc_mbmaxlen(enc);
154
+ if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
155
+ if (p > p1) {
156
+ rb_str_buf_cat(buf, p1, p - p1);
157
+ }
158
+
159
+ if (e - p < clen) clen = e - p;
160
+ if (clen <= 2) {
161
+ clen = 1;
162
+ }
163
+ else {
164
+ const char *q = p;
165
+ clen--;
166
+ for (; clen > 1; clen--) {
167
+ ret = rb_enc_precise_mbclen(q, q + clen, enc);
168
+ if (MBCLEN_NEEDMORE_P(ret)) break;
169
+ if (MBCLEN_INVALID_P(ret)) continue;
170
+ UNREACHABLE;
171
+ }
172
+ }
173
+ if (rep) {
174
+ rb_str_buf_cat(buf, rep, replen);
175
+ if (!rep7bit_p) cr = ENC_CODERANGE_VALID;
176
+ }
177
+ else {
178
+ repl = rb_yield(rb_enc_str_new(p1, clen, enc));
179
+ repl = str_compat_and_valid(repl, enc);
180
+ rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
181
+ if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
182
+ cr = ENC_CODERANGE_VALID;
183
+ }
184
+ p += clen;
185
+ p1 = p;
186
+ p = search_nonascii(p, e);
187
+ if (!p) {
188
+ p = e;
189
+ break;
190
+ }
191
+ }
192
+ else {
193
+ UNREACHABLE;
194
+ }
195
+ }
196
+ if (NIL_P(buf)) {
197
+ if (p == e) {
198
+ ENC_CODERANGE_SET(str, cr);
199
+ return Qnil;
200
+ }
201
+ buf = rb_str_buf_new(RSTRING_LEN(str));
202
+ }
203
+ if (p1 < p) {
204
+ rb_str_buf_cat(buf, p1, p - p1);
205
+ }
206
+ if (p < e) {
207
+ if (rep) {
208
+ rb_str_buf_cat(buf, rep, replen);
209
+ if (!rep7bit_p) cr = ENC_CODERANGE_VALID;
210
+ }
211
+ else {
212
+ repl = rb_yield(rb_enc_str_new(p, e-p, enc));
213
+ repl = str_compat_and_valid(repl, enc);
214
+ rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
215
+ if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
216
+ cr = ENC_CODERANGE_VALID;
217
+ }
218
+ }
219
+ ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr);
220
+ return buf;
221
+ }
222
+ else {
223
+ /* ASCII incompatible */
224
+ const char *p = RSTRING_PTR(str);
225
+ const char *e = RSTRING_END(str);
226
+ const char *p1 = p;
227
+ VALUE buf = Qnil;
228
+ const char *rep;
229
+ long replen;
230
+ long mbminlen = rb_enc_mbminlen(enc);
231
+ if (!NIL_P(repl)) {
232
+ rep = RSTRING_PTR(repl);
233
+ replen = RSTRING_LEN(repl);
234
+ }
235
+ else if (!strcasecmp(rb_enc_name(enc), "UTF-16BE")) {
236
+ DEFAULT_REPLACE_CHAR("\xFF\xFD");
237
+ }
238
+ else if (!strcasecmp(rb_enc_name(enc), "UTF-16LE")) {
239
+ DEFAULT_REPLACE_CHAR("\xFD\xFF");
240
+ }
241
+ else if (!strcasecmp(rb_enc_name(enc), "UTF-32BE")) {
242
+ DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD");
243
+ }
244
+ else if (!strcasecmp(rb_enc_name(enc), "UTF-32lE")) {
245
+ DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00");
246
+ }
247
+ else {
248
+ DEFAULT_REPLACE_CHAR("?");
249
+ }
250
+
251
+ while (p < e) {
252
+ int ret = rb_enc_precise_mbclen(p, e, enc);
253
+ if (MBCLEN_NEEDMORE_P(ret)) {
254
+ break;
255
+ }
256
+ else if (MBCLEN_CHARFOUND_P(ret)) {
257
+ p += MBCLEN_CHARFOUND_LEN(ret);
258
+ }
259
+ else if (MBCLEN_INVALID_P(ret)) {
260
+ const char *q = p;
261
+ long clen = rb_enc_mbmaxlen(enc);
262
+ if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
263
+ if (p > p1) rb_str_buf_cat(buf, p1, p - p1);
264
+
265
+ if (e - p < clen) clen = e - p;
266
+ if (clen <= mbminlen * 2) {
267
+ clen = mbminlen;
268
+ }
269
+ else {
270
+ clen -= mbminlen;
271
+ for (; clen > mbminlen; clen-=mbminlen) {
272
+ ret = rb_enc_precise_mbclen(q, q + clen, enc);
273
+ if (MBCLEN_NEEDMORE_P(ret)) break;
274
+ if (MBCLEN_INVALID_P(ret)) continue;
275
+ UNREACHABLE;
276
+ }
277
+ }
278
+ if (rep) {
279
+ rb_str_buf_cat(buf, rep, replen);
280
+ }
281
+ else {
282
+ repl = rb_yield(rb_enc_str_new(p, e-p, enc));
283
+ repl = str_compat_and_valid(repl, enc);
284
+ rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
285
+ }
286
+ p += clen;
287
+ p1 = p;
288
+ }
289
+ else {
290
+ UNREACHABLE;
291
+ }
292
+ }
293
+ if (NIL_P(buf)) {
294
+ if (p == e) {
295
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
296
+ return Qnil;
297
+ }
298
+ buf = rb_str_buf_new(RSTRING_LEN(str));
299
+ }
300
+ if (p1 < p) {
301
+ rb_str_buf_cat(buf, p1, p - p1);
302
+ }
303
+ if (p < e) {
304
+ if (rep) {
305
+ rb_str_buf_cat(buf, rep, replen);
306
+ }
307
+ else {
308
+ repl = rb_yield(rb_enc_str_new(p, e-p, enc));
309
+ repl = str_compat_and_valid(repl, enc);
310
+ rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
311
+ }
312
+ }
313
+ ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), ENC_CODERANGE_VALID);
314
+ return buf;
315
+ }
316
+ }
317
+
318
+ /*
319
+ * call-seq:
320
+ * str.scrub -> new_str
321
+ * str.scrub(repl) -> new_str
322
+ * str.scrub{|bytes|} -> new_str
323
+ *
324
+ * If the string is invalid byte sequence then replace invalid bytes with given replacement
325
+ * character, else returns self.
326
+ * If block is given, replace invalid bytes with returned value of the block.
327
+ *
328
+ * "abc\u3042\x81".scrub #=> "abc\u3042\uFFFD"
329
+ * "abc\u3042\x81".scrub("*") #=> "abc\u3042*"
330
+ * "abc\u3042\xE3\x80".scrub{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"
331
+ */
332
+ VALUE
333
+ rb_str_scrub(int argc, VALUE *argv, VALUE str)
334
+ {
335
+ VALUE new = str_scrub0(argc, argv, str);
336
+ return NIL_P(new) ? rb_str_dup(str): new;
337
+ }
338
+
339
+ /*
340
+ * call-seq:
341
+ * str.scrub! -> str
342
+ * str.scrub!(repl) -> str
343
+ * str.scrub!{|bytes|} -> str
344
+ *
345
+ * If the string is invalid byte sequence then replace invalid bytes with given replacement
346
+ * character, else returns self.
347
+ * If block is given, replace invalid bytes with returned value of the block.
348
+ *
349
+ * "abc\u3042\x81".scrub! #=> "abc\u3042\uFFFD"
350
+ * "abc\u3042\x81".scrub!("*") #=> "abc\u3042*"
351
+ * "abc\u3042\xE3\x80".scrub!{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"
352
+ */
353
+ static VALUE
354
+ str_scrub_bang(int argc, VALUE *argv, VALUE str)
355
+ {
356
+ VALUE new = str_scrub0(argc, argv, str);
357
+ if (!NIL_P(new)) rb_str_replace(str, new);
358
+ return str;
359
+ }
360
+
361
+ void
362
+ Init_scrub(void)
363
+ {
364
+ rb_define_method(rb_cString, "scrub", rb_str_scrub, -1);
365
+ rb_define_method(rb_cString, "scrub!", str_scrub_bang, -1);
366
+ }
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+
4
+ Gem::Specification.new do |spec|
5
+ spec.name = "string-scrub"
6
+ spec.version = "0.0.1"
7
+ spec.authors = ["SHIBATA Hiroshi"]
8
+ spec.email = ["shibata.hiroshi@gmail.com"]
9
+ spec.summary = %q{String#scrub for Ruby 2.0.0}
10
+ spec.description = %q{String#scrub for Ruby 2.0.0}
11
+ spec.homepage = "https://github.com/hsbt/string-scrub"
12
+ spec.license = "MIT"
13
+
14
+ spec.files = `git ls-files`.split($/)
15
+ spec.extensions = ["ext/string/extconf.rb"]
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.required_ruby_version = '>= 2.0.0'
21
+
22
+ spec.add_development_dependency "bundler"
23
+ spec.add_development_dependency "rake"
24
+ end
@@ -0,0 +1,71 @@
1
+ # coding: US-ASCII
2
+ require 'test/unit'
3
+ require_relative '../ext/string/scrub'
4
+
5
+ class TestScrub < Test::Unit::TestCase
6
+ module AESU
7
+ def ua(str) str.dup.force_encoding("US-ASCII") end
8
+ def a(str) str.dup.force_encoding("ASCII-8BIT") end
9
+ def e(str) str.dup.force_encoding("EUC-JP") end
10
+ def s(str) str.dup.force_encoding("Windows-31J") end
11
+ def u(str) str.dup.force_encoding("UTF-8") end
12
+ end
13
+ include AESU
14
+
15
+ def test_scrub
16
+ str = "\u3042\u3044"
17
+ assert_not_same(str, str.scrub)
18
+ str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding
19
+ assert_not_same(str, str.scrub)
20
+
21
+ assert_equal("\uFFFD\uFFFD\uFFFD", u("\x80\x80\x80").scrub)
22
+ assert_equal("\uFFFDA", u("\xF4\x80\x80A").scrub)
23
+
24
+ # exapmles in Unicode 6.1.0 D93b
25
+ assert_equal("\x41\uFFFD\uFFFD\x41\uFFFD\x41",
26
+ u("\x41\xC0\xAF\x41\xF4\x80\x80\x41").scrub)
27
+ assert_equal("\x41\uFFFD\uFFFD\uFFFD\x41",
28
+ u("\x41\xE0\x9F\x80\x41").scrub)
29
+ assert_equal("\u0061\uFFFD\uFFFD\uFFFD\u0062\uFFFD\u0063\uFFFD\uFFFD\u0064",
30
+ u("\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64").scrub)
31
+ assert_equal("abcdefghijklmnopqrstuvwxyz\u0061\uFFFD\uFFFD\uFFFD\u0062\uFFFD\u0063\uFFFD\uFFFD\u0064",
32
+ u("abcdefghijklmnopqrstuvwxyz\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64").scrub)
33
+
34
+ assert_equal("\u3042\u3013", u("\xE3\x81\x82\xE3\x81").scrub("\u3013"))
35
+ assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub(e("\xA4\xA2")) }
36
+ assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub(1) }
37
+ assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub(u("\x81")) }
38
+ assert_equal(e("\xA4\xA2\xA2\xAE"), e("\xA4\xA2\xA4").scrub(e("\xA2\xAE")))
39
+
40
+ assert_equal("\u3042<e381>", u("\xE3\x81\x82\xE3\x81").scrub{|x|'<'+x.unpack('H*')[0]+'>'})
41
+ assert_raise(Encoding::CompatibilityError){ u("\xE3\x81\x82\xE3\x81").scrub{e("\xA4\xA2")} }
42
+ assert_raise(TypeError){ u("\xE3\x81\x82\xE3\x81").scrub{1} }
43
+ assert_raise(ArgumentError){ u("\xE3\x81\x82\xE3\x81\x82\xE3\x81").scrub{u("\x81")} }
44
+ assert_equal(e("\xA4\xA2\xA2\xAE"), e("\xA4\xA2\xA4").scrub{e("\xA2\xAE")})
45
+
46
+ assert_equal("\uFFFD\u3042".encode("UTF-16BE"),
47
+ "\xD8\x00\x30\x42".force_encoding(Encoding::UTF_16BE).
48
+ scrub)
49
+ assert_equal("\uFFFD\u3042".encode("UTF-16LE"),
50
+ "\x00\xD8\x42\x30".force_encoding(Encoding::UTF_16LE).
51
+ scrub)
52
+ assert_equal("\uFFFD".encode("UTF-32BE"),
53
+ "\xff".force_encoding(Encoding::UTF_32BE).
54
+ scrub)
55
+ assert_equal("\uFFFD".encode("UTF-32LE"),
56
+ "\xff".force_encoding(Encoding::UTF_32LE).
57
+ scrub)
58
+ end
59
+
60
+ def test_scrub_bang
61
+ str = "\u3042\u3044"
62
+ assert_same(str, str.scrub!)
63
+ str.force_encoding(Encoding::ISO_2022_JP) # dummy encoding
64
+ assert_same(str, str.scrub!)
65
+
66
+ str = u("\x80\x80\x80")
67
+ str.scrub!
68
+ assert_same(str, str.scrub!)
69
+ assert_equal("\uFFFD\uFFFD\uFFFD", str)
70
+ end
71
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: string-scrub
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - SHIBATA Hiroshi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-11-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: String#scrub for Ruby 2.0.0
42
+ email:
43
+ - shibata.hiroshi@gmail.com
44
+ executables: []
45
+ extensions:
46
+ - ext/string/extconf.rb
47
+ extra_rdoc_files: []
48
+ files:
49
+ - ".gitignore"
50
+ - ".rspec"
51
+ - ".travis.yml"
52
+ - Gemfile
53
+ - LICENSE.txt
54
+ - README.md
55
+ - Rakefile
56
+ - ext/string/extconf.rb
57
+ - ext/string/scrub.c
58
+ - string-scrub.gemspec
59
+ - test/test_scrub.rb
60
+ homepage: https://github.com/hsbt/string-scrub
61
+ licenses:
62
+ - MIT
63
+ metadata: {}
64
+ post_install_message:
65
+ rdoc_options: []
66
+ require_paths:
67
+ - lib
68
+ required_ruby_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 2.0.0
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ requirements: []
79
+ rubyforge_project:
80
+ rubygems_version: 2.1.10
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: String#scrub for Ruby 2.0.0
84
+ test_files:
85
+ - test/test_scrub.rb
86
+ has_rdoc: