rejectu 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -1
- data/README.md +15 -8
- data/ext/rejectu/rejectu.c +35 -15
- data/rejectu.gemspec +2 -1
- data/test/test_rejectu.rb +9 -0
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b1db9ef4eb58f201a481308327961182d1226609
|
4
|
+
data.tar.gz: 3ff4ccab421b53c29bd6ed08219c36218ce761e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e1f64f8d17d061fb0e521bb63f4923896e00eed84375156dd4b3b3164b3671dce5021112fc08887ddd842381db6814a505cbea131a2d8e090893664294de2a0
|
7
|
+
data.tar.gz: 4a04a170ebb63932dae2c863ebc00777aaccab8b7ec5583aa44065d1f4773208c813afe2782e48dbe2ea79cf9764708951d0e8c181cea12d2f271f90d4c276dd
|
data/Gemfile.lock
CHANGED
@@ -1,14 +1,17 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
rejectu (0.0.
|
4
|
+
rejectu (0.0.2)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
+
power_assert (0.2.4)
|
9
10
|
rake (10.3.2)
|
10
11
|
rake-compiler (0.9.2)
|
11
12
|
rake
|
13
|
+
test-unit (3.1.4)
|
14
|
+
power_assert
|
12
15
|
|
13
16
|
PLATFORMS
|
14
17
|
ruby
|
@@ -16,3 +19,4 @@ PLATFORMS
|
|
16
19
|
DEPENDENCIES
|
17
20
|
rake-compiler (~> 0.9)
|
18
21
|
rejectu!
|
22
|
+
test-unit (~> 3.1)
|
data/README.md
CHANGED
@@ -6,18 +6,25 @@ A simple Ruby extension that verifies that a UTF-8 string does not contain any c
|
|
6
6
|
|
7
7
|
- C extension that uses SSE2 for webscale
|
8
8
|
|
9
|
+
### Installation
|
10
|
+
|
11
|
+
`gem install rejectu`
|
12
|
+
|
13
|
+
or if you're using bundler add the following to your Gemfile
|
14
|
+
|
15
|
+
`gem "rejectu"`
|
16
|
+
|
9
17
|
### Usage
|
10
18
|
|
11
19
|
```ruby
|
12
20
|
require 'rejectu/rejectu'
|
13
21
|
|
14
|
-
Rejectu.valid?
|
15
|
-
Rejectu.valid?
|
16
|
-
Rejectu.valid?
|
22
|
+
Rejectu.valid?("happy! \xf2\xa4\xb7\xa4") # false
|
23
|
+
Rejectu.valid?("really happy!") # true
|
24
|
+
Rejectu.valid?("this should be good too \xe2\x84\xa2") # true
|
17
25
|
|
18
|
-
Rejectu.scrub
|
19
|
-
```
|
20
|
-
|
21
|
-
### Limitations
|
26
|
+
Rejectu.scrub("happy! \xf2\xa4\xb7\xa4") # => "happy! ?"
|
22
27
|
|
23
|
-
|
28
|
+
# using a custom replacement character
|
29
|
+
Rejectu.scrub("happy! \xf2\xa4\xb7\xa4", ".") # => "happy! ."
|
30
|
+
```
|
data/ext/rejectu/rejectu.c
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
|
7
7
|
static VALUE mRejectu = Qnil;
|
8
8
|
static VALUE idEncoding, idTo_s;
|
9
|
+
static VALUE defaultToken = Qnil;
|
9
10
|
|
10
11
|
#ifdef __SSE2__
|
11
12
|
static inline int
|
@@ -125,11 +126,12 @@ is_valid(VALUE self, VALUE str)
|
|
125
126
|
}
|
126
127
|
|
127
128
|
static VALUE
|
128
|
-
do_scrub(VALUE str)
|
129
|
+
do_scrub(VALUE str, VALUE rplToken)
|
129
130
|
{
|
130
131
|
VALUE out_str;
|
131
132
|
unsigned char *p, *end, *out_start, *out;
|
132
133
|
long len, out_len;
|
134
|
+
char token = StringValueCStr(rplToken)[0];
|
133
135
|
|
134
136
|
validate_utf8_input(str);
|
135
137
|
|
@@ -151,7 +153,7 @@ do_scrub(VALUE str)
|
|
151
153
|
} else {
|
152
154
|
p += 4;
|
153
155
|
}
|
154
|
-
*out++ =
|
156
|
+
*out++ = token;
|
155
157
|
} else {
|
156
158
|
*out++ = *p++;
|
157
159
|
}
|
@@ -165,34 +167,52 @@ do_scrub(VALUE str)
|
|
165
167
|
}
|
166
168
|
|
167
169
|
static VALUE
|
168
|
-
scrub(VALUE
|
170
|
+
scrub(int argc, VALUE *argv, VALUE self)
|
169
171
|
{
|
170
|
-
|
171
|
-
|
172
|
+
VALUE input, token;
|
173
|
+
rb_scan_args(argc, argv, "11", &input, &token);
|
174
|
+
|
175
|
+
if (is_valid(self, input) == Qtrue) {
|
176
|
+
return input;
|
177
|
+
}
|
178
|
+
|
179
|
+
if (token == Qnil) {
|
180
|
+
token = defaultToken;
|
172
181
|
}
|
173
|
-
|
182
|
+
|
183
|
+
return do_scrub(input, token);
|
174
184
|
}
|
175
185
|
|
176
186
|
static VALUE
|
177
|
-
scrub_bang(VALUE
|
187
|
+
scrub_bang(int argc, VALUE *argv, VALUE self)
|
178
188
|
{
|
179
|
-
VALUE
|
180
|
-
|
181
|
-
|
189
|
+
VALUE input, token;
|
190
|
+
rb_scan_args(argc, argv, "11", &input, &token);
|
191
|
+
|
192
|
+
if (!is_valid(self, input)) {
|
193
|
+
if (token == Qnil) {
|
194
|
+
token = defaultToken;
|
195
|
+
}
|
196
|
+
|
197
|
+
VALUE repl = do_scrub(input, token);
|
198
|
+
if (!NIL_P(repl)) {
|
199
|
+
rb_str_replace(input, repl);
|
200
|
+
}
|
182
201
|
}
|
183
|
-
|
184
|
-
|
185
|
-
return str;
|
202
|
+
|
203
|
+
return input;
|
186
204
|
}
|
187
205
|
|
188
206
|
void
|
189
207
|
Init_rejectu()
|
190
208
|
{
|
191
209
|
mRejectu = rb_define_module("Rejectu");
|
210
|
+
defaultToken = rb_str_new2("?");
|
211
|
+
rb_global_variable(&defaultToken);
|
192
212
|
|
193
213
|
rb_define_singleton_method(mRejectu, "valid?", is_valid, 1);
|
194
|
-
rb_define_singleton_method(mRejectu, "scrub", scrub, 1);
|
195
|
-
rb_define_singleton_method(mRejectu, "scrub!", scrub_bang, 1);
|
214
|
+
rb_define_singleton_method(mRejectu, "scrub", scrub, -1);
|
215
|
+
rb_define_singleton_method(mRejectu, "scrub!", scrub_bang, -1);
|
196
216
|
|
197
217
|
idEncoding = rb_intern("encoding");
|
198
218
|
idTo_s = rb_intern("to_s");
|
data/rejectu.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'rejectu'
|
3
|
-
s.version = '0.0
|
3
|
+
s.version = '0.1.0'
|
4
4
|
s.summary = 'Detects if a UTF-8 string supplementary plane code points'
|
5
5
|
s.description = <<-DOC
|
6
6
|
This gem detects if a UTF-8 encoded string contains characters from the UTF-8 supplementary
|
@@ -14,4 +14,5 @@ Gem::Specification.new do |s|
|
|
14
14
|
s.files = `git ls-files`.split("\n")
|
15
15
|
s.extensions = ['ext/rejectu/extconf.rb']
|
16
16
|
s.add_development_dependency 'rake-compiler', '~> 0.9'
|
17
|
+
s.add_development_dependency 'test-unit', '~> 3.1'
|
17
18
|
end
|
data/test/test_rejectu.rb
CHANGED
@@ -77,10 +77,19 @@ class TestRejectu < Test::Unit::TestCase
|
|
77
77
|
assert_equal "? test string", Rejectu.scrub("\xf2\xa4\xb7\xa4 test string")
|
78
78
|
end
|
79
79
|
|
80
|
+
def test_scrub_with_custom_token
|
81
|
+
assert_equal ". test string", Rejectu.scrub("\xf2\xa4\xb7\xa4 test string", ".")
|
82
|
+
end
|
83
|
+
|
80
84
|
def test_scrub!
|
81
85
|
s = "\xf2\xa4\xb7\xa4 test string"
|
82
86
|
assert_equal "? test string", Rejectu.scrub!(s)
|
83
87
|
assert_equal "? test string", s
|
84
88
|
end
|
85
89
|
|
90
|
+
def test_scrub_with_custom_token!
|
91
|
+
s = "\xf2\xa4\xb7\xa4 test string"
|
92
|
+
assert_equal ". test string", Rejectu.scrub!(s, ".")
|
93
|
+
assert_equal ". test string", s
|
94
|
+
end
|
86
95
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rejectu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Scott Francis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-10-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0.9'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: test-unit
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.1'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.1'
|
27
41
|
description: |2
|
28
42
|
This gem detects if a UTF-8 encoded string contains characters from the UTF-8 supplementary
|
29
43
|
plane (code points >= U+10000).
|
@@ -63,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
77
|
version: '0'
|
64
78
|
requirements: []
|
65
79
|
rubyforge_project:
|
66
|
-
rubygems_version: 2.2.
|
80
|
+
rubygems_version: 2.2.3
|
67
81
|
signing_key:
|
68
82
|
specification_version: 4
|
69
83
|
summary: Detects if a UTF-8 string supplementary plane code points
|