rejectu 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 26755280c4039506f58fa4ca6f055128a6cd4986
4
- data.tar.gz: 241be0c2c820fc77f611fa66b69dfc842e71f1e1
3
+ metadata.gz: b1db9ef4eb58f201a481308327961182d1226609
4
+ data.tar.gz: 3ff4ccab421b53c29bd6ed08219c36218ce761e0
5
5
  SHA512:
6
- metadata.gz: ff027e3da57a31fa94087f7303d288a61960e93d79d9e09afeb26f58d2b2e93bd5e9549c2d48256fdd82953c9f07c68c9ccd2208cb88057f43fc3ddef5a248c6
7
- data.tar.gz: ca9c37d6035dee4d3258023cd3d4af76967c2bd5dc006aa666862be7f9e28905c649bcefaed6bed955d8ed6c6cf8dc8ca498ed989a45483d7618339944d885d6
6
+ metadata.gz: 4e1f64f8d17d061fb0e521bb63f4923896e00eed84375156dd4b3b3164b3671dce5021112fc08887ddd842381db6814a505cbea131a2d8e090893664294de2a0
7
+ data.tar.gz: 4a04a170ebb63932dae2c863ebc00777aaccab8b7ec5583aa44065d1f4773208c813afe2782e48dbe2ea79cf9764708951d0e8c181cea12d2f271f90d4c276dd
@@ -1,14 +1,17 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- rejectu (0.0.1)
4
+ rejectu (0.0.2)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
+ power_assert (0.2.4)
9
10
  rake (10.3.2)
10
11
  rake-compiler (0.9.2)
11
12
  rake
13
+ test-unit (3.1.4)
14
+ power_assert
12
15
 
13
16
  PLATFORMS
14
17
  ruby
@@ -16,3 +19,4 @@ PLATFORMS
16
19
  DEPENDENCIES
17
20
  rake-compiler (~> 0.9)
18
21
  rejectu!
22
+ test-unit (~> 3.1)
data/README.md CHANGED
@@ -6,18 +6,25 @@ A simple Ruby extension that verifies that a UTF-8 string does not contain any c
6
6
 
7
7
  - C extension that uses SSE2 for webscale
8
8
 
9
+ ### Installation
10
+
11
+ `gem install rejectu`
12
+
13
+ or if you're using bundler add the following to your Gemfile
14
+
15
+ `gem "rejectu"`
16
+
9
17
  ### Usage
10
18
 
11
19
  ```ruby
12
20
  require 'rejectu/rejectu'
13
21
 
14
- Rejectu.valid? "happy! \xf2\xa4\xb7\xa4" # false
15
- Rejectu.valid? "really happy!" # true
16
- Rejectu.valid? "this should be good too \xe2\x84\xa2" # true
22
+ Rejectu.valid?("happy! \xf2\xa4\xb7\xa4") # false
23
+ Rejectu.valid?("really happy!") # true
24
+ Rejectu.valid?("this should be good too \xe2\x84\xa2") # true
17
25
 
18
- Rejectu.scrub "happy! \xf2\xa4\xb7\xa4" # => "happy! ?"
19
- ```
20
-
21
- ### Limitations
26
+ Rejectu.scrub("happy! \xf2\xa4\xb7\xa4") # => "happy! ?"
22
27
 
23
- - The replacement character for `scrub` currently cannot be configured
28
+ # using a custom replacement character
29
+ Rejectu.scrub("happy! \xf2\xa4\xb7\xa4", ".") # => "happy! ."
30
+ ```
@@ -6,6 +6,7 @@
6
6
 
7
7
  static VALUE mRejectu = Qnil;
8
8
  static VALUE idEncoding, idTo_s;
9
+ static VALUE defaultToken = Qnil;
9
10
 
10
11
  #ifdef __SSE2__
11
12
  static inline int
@@ -125,11 +126,12 @@ is_valid(VALUE self, VALUE str)
125
126
  }
126
127
 
127
128
  static VALUE
128
- do_scrub(VALUE str)
129
+ do_scrub(VALUE str, VALUE rplToken)
129
130
  {
130
131
  VALUE out_str;
131
132
  unsigned char *p, *end, *out_start, *out;
132
133
  long len, out_len;
134
+ char token = StringValueCStr(rplToken)[0];
133
135
 
134
136
  validate_utf8_input(str);
135
137
 
@@ -151,7 +153,7 @@ do_scrub(VALUE str)
151
153
  } else {
152
154
  p += 4;
153
155
  }
154
- *out++ = '?';
156
+ *out++ = token;
155
157
  } else {
156
158
  *out++ = *p++;
157
159
  }
@@ -165,34 +167,52 @@ do_scrub(VALUE str)
165
167
  }
166
168
 
167
169
  static VALUE
168
- scrub(VALUE self, VALUE str)
170
+ scrub(int argc, VALUE *argv, VALUE self)
169
171
  {
170
- if (is_valid(self, str) == Qtrue) {
171
- return rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), rb_utf8_encoding());
172
+ VALUE input, token;
173
+ rb_scan_args(argc, argv, "11", &input, &token);
174
+
175
+ if (is_valid(self, input) == Qtrue) {
176
+ return input;
177
+ }
178
+
179
+ if (token == Qnil) {
180
+ token = defaultToken;
172
181
  }
173
- return do_scrub(str);
182
+
183
+ return do_scrub(input, token);
174
184
  }
175
185
 
176
186
  static VALUE
177
- scrub_bang(VALUE self, VALUE str)
187
+ scrub_bang(int argc, VALUE *argv, VALUE self)
178
188
  {
179
- VALUE repl;
180
- if (is_valid(self, str) == Qtrue) {
181
- return str;
189
+ VALUE input, token;
190
+ rb_scan_args(argc, argv, "11", &input, &token);
191
+
192
+ if (!is_valid(self, input)) {
193
+ if (token == Qnil) {
194
+ token = defaultToken;
195
+ }
196
+
197
+ VALUE repl = do_scrub(input, token);
198
+ if (!NIL_P(repl)) {
199
+ rb_str_replace(input, repl);
200
+ }
182
201
  }
183
- repl = do_scrub(str);
184
- if (!NIL_P(repl)) rb_str_replace(str, repl);
185
- return str;
202
+
203
+ return input;
186
204
  }
187
205
 
188
206
  void
189
207
  Init_rejectu()
190
208
  {
191
209
  mRejectu = rb_define_module("Rejectu");
210
+ defaultToken = rb_str_new2("?");
211
+ rb_global_variable(&defaultToken);
192
212
 
193
213
  rb_define_singleton_method(mRejectu, "valid?", is_valid, 1);
194
- rb_define_singleton_method(mRejectu, "scrub", scrub, 1);
195
- rb_define_singleton_method(mRejectu, "scrub!", scrub_bang, 1);
214
+ rb_define_singleton_method(mRejectu, "scrub", scrub, -1);
215
+ rb_define_singleton_method(mRejectu, "scrub!", scrub_bang, -1);
196
216
 
197
217
  idEncoding = rb_intern("encoding");
198
218
  idTo_s = rb_intern("to_s");
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'rejectu'
3
- s.version = '0.0.2'
3
+ s.version = '0.1.0'
4
4
  s.summary = 'Detects if a UTF-8 string supplementary plane code points'
5
5
  s.description = <<-DOC
6
6
  This gem detects if a UTF-8 encoded string contains characters from the UTF-8 supplementary
@@ -14,4 +14,5 @@ Gem::Specification.new do |s|
14
14
  s.files = `git ls-files`.split("\n")
15
15
  s.extensions = ['ext/rejectu/extconf.rb']
16
16
  s.add_development_dependency 'rake-compiler', '~> 0.9'
17
+ s.add_development_dependency 'test-unit', '~> 3.1'
17
18
  end
@@ -77,10 +77,19 @@ class TestRejectu < Test::Unit::TestCase
77
77
  assert_equal "? test string", Rejectu.scrub("\xf2\xa4\xb7\xa4 test string")
78
78
  end
79
79
 
80
+ def test_scrub_with_custom_token
81
+ assert_equal ". test string", Rejectu.scrub("\xf2\xa4\xb7\xa4 test string", ".")
82
+ end
83
+
80
84
  def test_scrub!
81
85
  s = "\xf2\xa4\xb7\xa4 test string"
82
86
  assert_equal "? test string", Rejectu.scrub!(s)
83
87
  assert_equal "? test string", s
84
88
  end
85
89
 
90
+ def test_scrub_with_custom_token!
91
+ s = "\xf2\xa4\xb7\xa4 test string"
92
+ assert_equal ". test string", Rejectu.scrub!(s, ".")
93
+ assert_equal ". test string", s
94
+ end
86
95
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rejectu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Scott Francis
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-05 00:00:00.000000000 Z
11
+ date: 2015-10-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: test-unit
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.1'
27
41
  description: |2
28
42
  This gem detects if a UTF-8 encoded string contains characters from the UTF-8 supplementary
29
43
  plane (code points >= U+10000).
@@ -63,7 +77,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
63
77
  version: '0'
64
78
  requirements: []
65
79
  rubyforge_project:
66
- rubygems_version: 2.2.2
80
+ rubygems_version: 2.2.3
67
81
  signing_key:
68
82
  specification_version: 4
69
83
  summary: Detects if a UTF-8 string supplementary plane code points