rejectu 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 26755280c4039506f58fa4ca6f055128a6cd4986
4
- data.tar.gz: 241be0c2c820fc77f611fa66b69dfc842e71f1e1
3
+ metadata.gz: b1db9ef4eb58f201a481308327961182d1226609
4
+ data.tar.gz: 3ff4ccab421b53c29bd6ed08219c36218ce761e0
5
5
  SHA512:
6
- metadata.gz: ff027e3da57a31fa94087f7303d288a61960e93d79d9e09afeb26f58d2b2e93bd5e9549c2d48256fdd82953c9f07c68c9ccd2208cb88057f43fc3ddef5a248c6
7
- data.tar.gz: ca9c37d6035dee4d3258023cd3d4af76967c2bd5dc006aa666862be7f9e28905c649bcefaed6bed955d8ed6c6cf8dc8ca498ed989a45483d7618339944d885d6
6
+ metadata.gz: 4e1f64f8d17d061fb0e521bb63f4923896e00eed84375156dd4b3b3164b3671dce5021112fc08887ddd842381db6814a505cbea131a2d8e090893664294de2a0
7
+ data.tar.gz: 4a04a170ebb63932dae2c863ebc00777aaccab8b7ec5583aa44065d1f4773208c813afe2782e48dbe2ea79cf9764708951d0e8c181cea12d2f271f90d4c276dd
@@ -1,14 +1,17 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rejectu (0.0.1)
4
+ rejectu (0.0.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
+ power_assert (0.2.4)
9
10
  rake (10.3.2)
10
11
  rake-compiler (0.9.2)
11
12
  rake
13
+ test-unit (3.1.4)
14
+ power_assert
12
15
 
13
16
  PLATFORMS
14
17
  ruby
@@ -16,3 +19,4 @@ PLATFORMS
16
19
  DEPENDENCIES
17
20
  rake-compiler (~> 0.9)
18
21
  rejectu!
22
+ test-unit (~> 3.1)
data/README.md CHANGED
@@ -6,18 +6,25 @@ A simple Ruby extension that verifies that a UTF-8 string does not contain any c
6
6
 
7
7
  - C extension that uses SSE2 for webscale
8
8
 
9
+ ### Installation
10
+
11
+ `gem install rejectu`
12
+
13
+ or if you're using bundler add the following to your Gemfile
14
+
15
+ `gem "rejectu"`
16
+
9
17
  ### Usage
10
18
 
11
19
  ```ruby
12
20
  require 'rejectu/rejectu'
13
21
 
14
- Rejectu.valid? "happy! \xf2\xa4\xb7\xa4" # false
15
- Rejectu.valid? "really happy!" # true
16
- Rejectu.valid? "this should be good too \xe2\x84\xa2" # true
22
+ Rejectu.valid?("happy! \xf2\xa4\xb7\xa4") # false
23
+ Rejectu.valid?("really happy!") # true
24
+ Rejectu.valid?("this should be good too \xe2\x84\xa2") # true
17
25
 
18
- Rejectu.scrub "happy! \xf2\xa4\xb7\xa4" # => "happy! ?"
19
- ```
20
-
21
- ### Limitations
26
+ Rejectu.scrub("happy! \xf2\xa4\xb7\xa4") # => "happy! ?"
22
27
 
23
- - The replacement character for `scrub` currently cannot be configured
28
+ # using a custom replacement character
29
+ Rejectu.scrub("happy! \xf2\xa4\xb7\xa4", ".") # => "happy! ."
30
+ ```
@@ -6,6 +6,7 @@
6
6
 
7
7
  static VALUE mRejectu = Qnil;
8
8
  static VALUE idEncoding, idTo_s;
9
+ static VALUE defaultToken = Qnil;
9
10
 
10
11
  #ifdef __SSE2__
11
12
  static inline int
@@ -125,11 +126,12 @@ is_valid(VALUE self, VALUE str)
125
126
  }
126
127
 
127
128
  static VALUE
128
- do_scrub(VALUE str)
129
+ do_scrub(VALUE str, VALUE rplToken)
129
130
  {
130
131
  VALUE out_str;
131
132
  unsigned char *p, *end, *out_start, *out;
132
133
  long len, out_len;
134
+ char token = StringValueCStr(rplToken)[0];
133
135
 
134
136
  validate_utf8_input(str);
135
137
 
@@ -151,7 +153,7 @@ do_scrub(VALUE str)
151
153
  } else {
152
154
  p += 4;
153
155
  }
154
- *out++ = '?';
156
+ *out++ = token;
155
157
  } else {
156
158
  *out++ = *p++;
157
159
  }
@@ -165,34 +167,52 @@ do_scrub(VALUE str)
165
167
  }
166
168
 
167
169
  static VALUE
168
- scrub(VALUE self, VALUE str)
170
+ scrub(int argc, VALUE *argv, VALUE self)
169
171
  {
170
- if (is_valid(self, str) == Qtrue) {
171
- return rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), rb_utf8_encoding());
172
+ VALUE input, token;
173
+ rb_scan_args(argc, argv, "11", &input, &token);
174
+
175
+ if (is_valid(self, input) == Qtrue) {
176
+ return input;
177
+ }
178
+
179
+ if (token == Qnil) {
180
+ token = defaultToken;
172
181
  }
173
- return do_scrub(str);
182
+
183
+ return do_scrub(input, token);
174
184
  }
175
185
 
176
186
  static VALUE
177
- scrub_bang(VALUE self, VALUE str)
187
+ scrub_bang(int argc, VALUE *argv, VALUE self)
178
188
  {
179
- VALUE repl;
180
- if (is_valid(self, str) == Qtrue) {
181
- return str;
189
+ VALUE input, token;
190
+ rb_scan_args(argc, argv, "11", &input, &token);
191
+
192
+ if (!is_valid(self, input)) {
193
+ if (token == Qnil) {
194
+ token = defaultToken;
195
+ }
196
+
197
+ VALUE repl = do_scrub(input, token);
198
+ if (!NIL_P(repl)) {
199
+ rb_str_replace(input, repl);
200
+ }
182
201
  }
183
- repl = do_scrub(str);
184
- if (!NIL_P(repl)) rb_str_replace(str, repl);
185
- return str;
202
+
203
+ return input;
186
204
  }
187
205
 
188
206
  void
189
207
  Init_rejectu()
190
208
  {
191
209
  mRejectu = rb_define_module("Rejectu");
210
+ defaultToken = rb_str_new2("?");
211
+ rb_global_variable(&defaultToken);
192
212
 
193
213
  rb_define_singleton_method(mRejectu, "valid?", is_valid, 1);
194
- rb_define_singleton_method(mRejectu, "scrub", scrub, 1);
195
- rb_define_singleton_method(mRejectu, "scrub!", scrub_bang, 1);
214
+ rb_define_singleton_method(mRejectu, "scrub", scrub, -1);
215
+ rb_define_singleton_method(mRejectu, "scrub!", scrub_bang, -1);
196
216
 
197
217
  idEncoding = rb_intern("encoding");
198
218
  idTo_s = rb_intern("to_s");
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'rejectu'
3
- s.version = '0.0.2'
3
+ s.version = '0.1.0'
4
4
  s.summary = 'Detects if a UTF-8 string supplementary plane code points'
5
5
  s.description = <<-DOC
6
6
  This gem detects if a UTF-8 encoded string contains characters from the UTF-8 supplementary
@@ -14,4 +14,5 @@ Gem::Specification.new do |s|
14
14
  s.files = `git ls-files`.split("\n")
15
15
  s.extensions = ['ext/rejectu/extconf.rb']
16
16
  s.add_development_dependency 'rake-compiler', '~> 0.9'
17
+ s.add_development_dependency 'test-unit', '~> 3.1'
17
18
  end
@@ -77,10 +77,19 @@ class TestRejectu < Test::Unit::TestCase
77
77
  assert_equal "? test string", Rejectu.scrub("\xf2\xa4\xb7\xa4 test string")
78
78
  end
79
79
 
80
+ def test_scrub_with_custom_token
81
+ assert_equal ". test string", Rejectu.scrub("\xf2\xa4\xb7\xa4 test string", ".")
82
+ end
83
+
80
84
  def test_scrub!
81
85
  s = "\xf2\xa4\xb7\xa4 test string"
82
86
  assert_equal "? test string", Rejectu.scrub!(s)
83
87
  assert_equal "? test string", s
84
88
  end
85
89
 
90
+ def test_scrub_with_custom_token!
91
+ s = "\xf2\xa4\xb7\xa4 test string"
92
+ assert_equal ". test string", Rejectu.scrub!(s, ".")
93
+ assert_equal ". test string", s
94
+ end
86
95
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rejectu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Scott Francis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-05 00:00:00.000000000 Z
11
+ date: 2015-10-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: test-unit
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.1'
27
41
  description: |2
28
42
  This gem detects if a UTF-8 encoded string contains characters from the UTF-8 supplementary
29
43
  plane (code points >= U+10000).
@@ -63,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
63
77
  version: '0'
64
78
  requirements: []
65
79
  rubyforge_project:
66
- rubygems_version: 2.2.2
80
+ rubygems_version: 2.2.3
67
81
  signing_key:
68
82
  specification_version: 4
69
83
  summary: Detects if a UTF-8 string supplementary plane code points