rejectu 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -1
- data/README.md +15 -8
- data/ext/rejectu/rejectu.c +35 -15
- data/rejectu.gemspec +2 -1
- data/test/test_rejectu.rb +9 -0
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b1db9ef4eb58f201a481308327961182d1226609
|
4
|
+
data.tar.gz: 3ff4ccab421b53c29bd6ed08219c36218ce761e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e1f64f8d17d061fb0e521bb63f4923896e00eed84375156dd4b3b3164b3671dce5021112fc08887ddd842381db6814a505cbea131a2d8e090893664294de2a0
|
7
|
+
data.tar.gz: 4a04a170ebb63932dae2c863ebc00777aaccab8b7ec5583aa44065d1f4773208c813afe2782e48dbe2ea79cf9764708951d0e8c181cea12d2f271f90d4c276dd
|
data/Gemfile.lock
CHANGED
@@ -1,14 +1,17 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
rejectu (0.0.
|
4
|
+
rejectu (0.0.2)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
+
power_assert (0.2.4)
|
9
10
|
rake (10.3.2)
|
10
11
|
rake-compiler (0.9.2)
|
11
12
|
rake
|
13
|
+
test-unit (3.1.4)
|
14
|
+
power_assert
|
12
15
|
|
13
16
|
PLATFORMS
|
14
17
|
ruby
|
@@ -16,3 +19,4 @@ PLATFORMS
|
|
16
19
|
DEPENDENCIES
|
17
20
|
rake-compiler (~> 0.9)
|
18
21
|
rejectu!
|
22
|
+
test-unit (~> 3.1)
|
data/README.md
CHANGED
@@ -6,18 +6,25 @@ A simple Ruby extension that verifies that a UTF-8 string does not contain any c
|
|
6
6
|
|
7
7
|
- C extension that uses SSE2 for webscale
|
8
8
|
|
9
|
+
### Installation
|
10
|
+
|
11
|
+
`gem install rejectu`
|
12
|
+
|
13
|
+
or if you're using bundler add the following to your Gemfile
|
14
|
+
|
15
|
+
`gem "rejectu"`
|
16
|
+
|
9
17
|
### Usage
|
10
18
|
|
11
19
|
```ruby
|
12
20
|
require 'rejectu/rejectu'
|
13
21
|
|
14
|
-
Rejectu.valid?
|
15
|
-
Rejectu.valid?
|
16
|
-
Rejectu.valid?
|
22
|
+
Rejectu.valid?("happy! \xf2\xa4\xb7\xa4") # false
|
23
|
+
Rejectu.valid?("really happy!") # true
|
24
|
+
Rejectu.valid?("this should be good too \xe2\x84\xa2") # true
|
17
25
|
|
18
|
-
Rejectu.scrub
|
19
|
-
```
|
20
|
-
|
21
|
-
### Limitations
|
26
|
+
Rejectu.scrub("happy! \xf2\xa4\xb7\xa4") # => "happy! ?"
|
22
27
|
|
23
|
-
|
28
|
+
# using a custom replacement character
|
29
|
+
Rejectu.scrub("happy! \xf2\xa4\xb7\xa4", ".") # => "happy! ."
|
30
|
+
```
|
data/ext/rejectu/rejectu.c
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
|
7
7
|
static VALUE mRejectu = Qnil;
|
8
8
|
static VALUE idEncoding, idTo_s;
|
9
|
+
static VALUE defaultToken = Qnil;
|
9
10
|
|
10
11
|
#ifdef __SSE2__
|
11
12
|
static inline int
|
@@ -125,11 +126,12 @@ is_valid(VALUE self, VALUE str)
|
|
125
126
|
}
|
126
127
|
|
127
128
|
static VALUE
|
128
|
-
do_scrub(VALUE str)
|
129
|
+
do_scrub(VALUE str, VALUE rplToken)
|
129
130
|
{
|
130
131
|
VALUE out_str;
|
131
132
|
unsigned char *p, *end, *out_start, *out;
|
132
133
|
long len, out_len;
|
134
|
+
char token = StringValueCStr(rplToken)[0];
|
133
135
|
|
134
136
|
validate_utf8_input(str);
|
135
137
|
|
@@ -151,7 +153,7 @@ do_scrub(VALUE str)
|
|
151
153
|
} else {
|
152
154
|
p += 4;
|
153
155
|
}
|
154
|
-
*out++ =
|
156
|
+
*out++ = token;
|
155
157
|
} else {
|
156
158
|
*out++ = *p++;
|
157
159
|
}
|
@@ -165,34 +167,52 @@ do_scrub(VALUE str)
|
|
165
167
|
}
|
166
168
|
|
167
169
|
static VALUE
|
168
|
-
scrub(VALUE
|
170
|
+
scrub(int argc, VALUE *argv, VALUE self)
|
169
171
|
{
|
170
|
-
|
171
|
-
|
172
|
+
VALUE input, token;
|
173
|
+
rb_scan_args(argc, argv, "11", &input, &token);
|
174
|
+
|
175
|
+
if (is_valid(self, input) == Qtrue) {
|
176
|
+
return input;
|
177
|
+
}
|
178
|
+
|
179
|
+
if (token == Qnil) {
|
180
|
+
token = defaultToken;
|
172
181
|
}
|
173
|
-
|
182
|
+
|
183
|
+
return do_scrub(input, token);
|
174
184
|
}
|
175
185
|
|
176
186
|
static VALUE
|
177
|
-
scrub_bang(VALUE
|
187
|
+
scrub_bang(int argc, VALUE *argv, VALUE self)
|
178
188
|
{
|
179
|
-
VALUE
|
180
|
-
|
181
|
-
|
189
|
+
VALUE input, token;
|
190
|
+
rb_scan_args(argc, argv, "11", &input, &token);
|
191
|
+
|
192
|
+
if (!is_valid(self, input)) {
|
193
|
+
if (token == Qnil) {
|
194
|
+
token = defaultToken;
|
195
|
+
}
|
196
|
+
|
197
|
+
VALUE repl = do_scrub(input, token);
|
198
|
+
if (!NIL_P(repl)) {
|
199
|
+
rb_str_replace(input, repl);
|
200
|
+
}
|
182
201
|
}
|
183
|
-
|
184
|
-
|
185
|
-
return str;
|
202
|
+
|
203
|
+
return input;
|
186
204
|
}
|
187
205
|
|
188
206
|
void
|
189
207
|
Init_rejectu()
|
190
208
|
{
|
191
209
|
mRejectu = rb_define_module("Rejectu");
|
210
|
+
defaultToken = rb_str_new2("?");
|
211
|
+
rb_global_variable(&defaultToken);
|
192
212
|
|
193
213
|
rb_define_singleton_method(mRejectu, "valid?", is_valid, 1);
|
194
|
-
rb_define_singleton_method(mRejectu, "scrub", scrub, 1);
|
195
|
-
rb_define_singleton_method(mRejectu, "scrub!", scrub_bang, 1);
|
214
|
+
rb_define_singleton_method(mRejectu, "scrub", scrub, -1);
|
215
|
+
rb_define_singleton_method(mRejectu, "scrub!", scrub_bang, -1);
|
196
216
|
|
197
217
|
idEncoding = rb_intern("encoding");
|
198
218
|
idTo_s = rb_intern("to_s");
|
data/rejectu.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'rejectu'
|
3
|
-
s.version = '0.0
|
3
|
+
s.version = '0.1.0'
|
4
4
|
s.summary = 'Detects if a UTF-8 string supplementary plane code points'
|
5
5
|
s.description = <<-DOC
|
6
6
|
This gem detects if a UTF-8 encoded string contains characters from the UTF-8 supplementary
|
@@ -14,4 +14,5 @@ Gem::Specification.new do |s|
|
|
14
14
|
s.files = `git ls-files`.split("\n")
|
15
15
|
s.extensions = ['ext/rejectu/extconf.rb']
|
16
16
|
s.add_development_dependency 'rake-compiler', '~> 0.9'
|
17
|
+
s.add_development_dependency 'test-unit', '~> 3.1'
|
17
18
|
end
|
data/test/test_rejectu.rb
CHANGED
@@ -77,10 +77,19 @@ class TestRejectu < Test::Unit::TestCase
|
|
77
77
|
assert_equal "? test string", Rejectu.scrub("\xf2\xa4\xb7\xa4 test string")
|
78
78
|
end
|
79
79
|
|
80
|
+
def test_scrub_with_custom_token
|
81
|
+
assert_equal ". test string", Rejectu.scrub("\xf2\xa4\xb7\xa4 test string", ".")
|
82
|
+
end
|
83
|
+
|
80
84
|
def test_scrub!
|
81
85
|
s = "\xf2\xa4\xb7\xa4 test string"
|
82
86
|
assert_equal "? test string", Rejectu.scrub!(s)
|
83
87
|
assert_equal "? test string", s
|
84
88
|
end
|
85
89
|
|
90
|
+
def test_scrub_with_custom_token!
|
91
|
+
s = "\xf2\xa4\xb7\xa4 test string"
|
92
|
+
assert_equal ". test string", Rejectu.scrub!(s, ".")
|
93
|
+
assert_equal ". test string", s
|
94
|
+
end
|
86
95
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rejectu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Scott Francis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-10-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0.9'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: test-unit
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.1'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.1'
|
27
41
|
description: |2
|
28
42
|
This gem detects if a UTF-8 encoded string contains characters from the UTF-8 supplementary
|
29
43
|
plane (code points >= U+10000).
|
@@ -63,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
77
|
version: '0'
|
64
78
|
requirements: []
|
65
79
|
rubyforge_project:
|
66
|
-
rubygems_version: 2.2.
|
80
|
+
rubygems_version: 2.2.3
|
67
81
|
signing_key:
|
68
82
|
specification_version: 4
|
69
83
|
summary: Detects if a UTF-8 string supplementary plane code points
|