utf8 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ rvm:
2
+ - 1.8.7
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - rbx
6
+ - ree
@@ -5,6 +5,7 @@ require 'benchmark'
5
5
  require 'rubygems'
6
6
  require 'active_support'
7
7
 
8
+ $KCODE = 'UTF8'
8
9
 
9
10
  raw = File.read(File.expand_path('../test.txt', __FILE__))
10
11
  utf8 = raw.as_utf8
@@ -29,9 +30,15 @@ Benchmark.bmbm { |x|
29
30
  x.report("#[-start, len]") {
30
31
  times.times {utf8[-1024, 1024]}
31
32
  }
33
+ x.report("#clean") {
34
+ times.times {utf8.clean}
35
+ }
36
+ x.report("#valid?") {
37
+ times.times {utf8.valid?}
38
+ }
32
39
  }
33
40
 
34
- puts "\n\nActiveSupport::Multibyte::Chars"
41
+ puts "\n\nActiveSupport::Multibyte"
35
42
  Benchmark.bmbm { |x|
36
43
  x.report("#length") {
37
44
  times.times {as_mb.length}
@@ -48,4 +55,35 @@ Benchmark.bmbm { |x|
48
55
  x.report("#[-start, len]") {
49
56
  times.times {as_mb[-1024, 1024]}
50
57
  }
58
+ x.report("ActiveSupport::Multibyte.clean") {
59
+ times.times {ActiveSupport::Multibyte.clean(raw)}
60
+ }
61
+ x.report("ActiveSupport::Multibyte.verify") {
62
+ times.times {ActiveSupport::Multibyte.verify(raw)}
63
+ }
64
+ }
65
+
66
+ require 'iconv'
67
+ module ActiveSupport::Multibyte
68
+ class << self
69
+ OUTSIDE_ASCII = /[^\x00-\x7f]/n
70
+ ICONV_CLEANER = Iconv.new('UTF-8//IGNORE', 'UTF-8')
71
+
72
+ def clean_with_iconv(string)
73
+ if string =~ OUTSIDE_ASCII
74
+ ICONV_CLEANER.iconv(string + ' ')[0..-2]
75
+ else
76
+ string
77
+ end
78
+ end
79
+
80
+ alias_method :clean_without_iconv, :clean
81
+ alias_method :clean, :clean_with_iconv
82
+ end
83
+ end
84
+ puts "\n\nActiveSupport::Multibyte (patched with Iconv)"
85
+ Benchmark.bmbm { |x|
86
+ x.report("ActiveSupport::Multibyte.clean") {
87
+ times.times {ActiveSupport::Multibyte.clean(raw)}
88
+ }
51
89
  }
data/ext/utf8/extconf.rb CHANGED
@@ -4,4 +4,4 @@ require 'rbconfig'
4
4
  $CFLAGS << ' -Wall -funroll-loops'
5
5
  $CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG']
6
6
 
7
- create_makefile("utf8")
7
+ create_makefile("utf8/utf8")
@@ -93,7 +93,7 @@ static VALUE rb_cString_UTF8_each_codepoint(int argc, VALUE *argv, VALUE self) {
93
93
  /*
94
94
  * call-seq: valid?(max_codepoint=nil)
95
95
  *
96
- * Iterates over the string, yielding one UTF-8 codepoint at a time
96
+ * Iterates over the string, returning true/false if it's valid UTF-8
97
97
  *
98
98
  * max_codepoint - an optional Fixnum used to declare this string invalid
99
99
  * if a codepoint higher than that value is found
@@ -337,44 +337,73 @@ static VALUE rb_cString_UTF8_slice(int argc, VALUE *argv, VALUE self) {
337
337
  * Returns: a new String
338
338
  */
339
339
  static VALUE rb_cString_UTF8_clean(VALUE self) {
340
- unsigned char *str;
341
- unsigned char *out;
342
- unsigned char replace;
343
- size_t len;
340
+ unsigned char *inBuf, *inBufCur;
341
+ unsigned char *outBuf, *outBufCur;
342
+ size_t len, i;
344
343
  int8_t curCharLen;
345
- size_t i;
346
344
  VALUE rb_out;
347
345
 
348
- str = (unsigned char *)RSTRING_PTR(self);
346
+ inBuf = (unsigned char *)RSTRING_PTR(self);
347
+ inBufCur = inBuf;
349
348
  len = RSTRING_LEN(self);
350
- replace = REPLACEMENT_CHAR;
351
- out = xmalloc(len);
349
+ outBuf = malloc(len);
350
+ outBufCur = outBuf;
352
351
 
353
352
  for(i=0; i<len; i+=curCharLen) {
354
- curCharLen = utf8CharLen(str+i, len);
353
+ curCharLen = utf8CharLen(inBufCur, len);
355
354
  if (curCharLen < 0) {
356
- *(out+i) = replace;
355
+ if (inBufCur-inBuf > 0) {
356
+ memcpy(outBufCur, inBuf, inBufCur-inBuf);
357
+ outBufCur += inBufCur-inBuf;
358
+ }
359
+ *outBufCur++ = REPLACEMENT_CHAR;
360
+ inBuf += (inBufCur-inBuf)+1;
357
361
  curCharLen = 1;
358
- } else {
359
- memcpy(out+i, str+i, curCharLen);
360
362
  }
363
+
364
+ inBufCur += curCharLen;
365
+ }
366
+
367
+ if (inBufCur-inBuf > 0) {
368
+ memcpy(outBufCur, inBuf, inBufCur-inBuf);
361
369
  }
362
370
 
363
- rb_out = rb_str_new((const char*)out, len);
371
+ rb_out = rb_str_new((const char*)outBuf, len);
364
372
  AS_UTF8(rb_out);
365
373
 
366
- xfree(out);
374
+ free(outBuf);
367
375
 
368
376
  return rb_out;
369
377
  }
370
378
 
379
+ /*
380
+ * call-seq: clean
381
+ *
382
+ * Iterates over the string, returning true/false if it's within the low ASCII range
383
+ *
384
+ * Returns: a Boolean - true if the string is within the low ASCII range, false if not
385
+ */
386
+ static VALUE rb_cString_UTF8_ascii_only(VALUE self) {
387
+ unsigned char *str = (unsigned char *)RSTRING_PTR(self);
388
+ size_t len = RSTRING_LEN(self), i=0;
389
+
390
+ for(; i<len; i+=1) {
391
+ if (str[i] > 0x7f) {
392
+ return Qfalse;
393
+ }
394
+ }
395
+
396
+ return Qtrue;
397
+ }
398
+
371
399
  void init_String_UTF8() {
372
400
  VALUE rb_cString_UTF8 = rb_define_class_under(rb_cString, "UTF8", rb_cString);
373
401
 
374
- rb_define_method(rb_cString_UTF8, "length", rb_cString_UTF8_length, 0);
402
+ rb_define_method(rb_cString_UTF8, "length", rb_cString_UTF8_length, 0);
375
403
  rb_define_method(rb_cString_UTF8, "each_char", rb_cString_UTF8_each_char, -1);
376
- rb_define_method(rb_cString_UTF8, "[]", rb_cString_UTF8_slice, -1);
404
+ rb_define_method(rb_cString_UTF8, "[]", rb_cString_UTF8_slice, -1);
377
405
  rb_define_method(rb_cString_UTF8, "each_codepoint", rb_cString_UTF8_each_codepoint, -1);
378
406
  rb_define_method(rb_cString_UTF8, "valid?", rb_cString_UTF8_valid, -1);
379
407
  rb_define_method(rb_cString_UTF8, "clean", rb_cString_UTF8_clean, 0);
408
+ rb_define_method(rb_cString_UTF8, "ascii_only?", rb_cString_UTF8_ascii_only, 0);
380
409
  }
data/ext/utf8/utf8.c CHANGED
@@ -7,7 +7,7 @@
7
7
  * Scans the current position of the buffer
8
8
  * returning the length of this UTF-8 character
9
9
  */
10
- inline int8_t utf8CharLen(unsigned char *in, size_t in_len) {
10
+ int8_t utf8CharLen(unsigned char *in, size_t in_len) {
11
11
  if (in_len > 0) {
12
12
  unsigned char curChar, *start;
13
13
 
data/ext/utf8/utf8.h CHANGED
@@ -1,7 +1,7 @@
1
1
  #ifndef UTF8_UTF8_H
2
2
  #define UTF8_UTF8_H
3
3
 
4
- inline int8_t utf8CharLen(unsigned char *in, size_t in_len);
4
+ int8_t utf8CharLen(unsigned char *in, size_t in_len);
5
5
  int64_t utf8CharCount(unsigned char *in, size_t in_len);
6
6
  int32_t utf8CharToCodepoint(unsigned char *in, size_t in_len);
7
7
 
data/lib/utf8/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  class String
2
2
  class UTF8 < ::String
3
- VERSION = "0.1.6"
3
+ VERSION = "0.1.7"
4
4
  end
5
5
  end
data/spec/spec_helper.rb CHANGED
@@ -2,4 +2,9 @@ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
2
  require 'utf8'
3
3
  require 'utf8/string_scanner'
4
4
 
5
- require 'rspec' unless defined? Rspec
5
+ require 'rspec' unless defined? RSpec
6
+
7
+ RSpec.configure do |config|
8
+ config.expect_with :stdlib
9
+ config.alias_example_to :test
10
+ end
@@ -2,13 +2,13 @@
2
2
  require File.expand_path('../spec_helper', __FILE__)
3
3
 
4
4
  describe StringScanner::UTF8 do
5
- before(:each) do
5
+ before :each do
6
6
  @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
7
7
  @scanner = StringScanner.new(@char_array.join)
8
8
  @utf8_scanner = @scanner.as_utf8
9
9
  end
10
10
 
11
- it "should blow up on invalid utf8 chars" do
11
+ test "should blow up on invalid utf8 chars" do
12
12
  # lets cut right into the middle of a sequence so we know it's bad
13
13
  str = @char_array.join
14
14
  str.force_encoding('binary') if str.respond_to?(:force_encoding)
@@ -16,34 +16,34 @@ describe StringScanner::UTF8 do
16
16
  str.force_encoding('utf-8') if str.respond_to?(:force_encoding)
17
17
  scanner = StringScanner.new(str).as_utf8
18
18
 
19
- lambda {
19
+ assert_raise ArgumentError do
20
20
  scanner.getch
21
- }.should raise_error(ArgumentError)
21
+ end
22
22
  end
23
23
 
24
- it "should extend StringScanner, adding an as_utf8 method that returns a StringScanner::UTF8 instance" do
25
- @scanner.should respond_to(:as_utf8)
26
- @scanner.as_utf8.class.should eql(StringScanner::UTF8)
24
+ test "should extend StringScanner, adding an as_utf8 method that returns a StringScanner::UTF8 instance" do
25
+ assert @scanner.respond_to?(:as_utf8)
26
+ assert_equal StringScanner::UTF8, @scanner.as_utf8.class
27
27
  end
28
28
 
29
- it "should allow access to a regular (non-utf8-aware) StringScanner based on it's string" do
29
+ test "should allow access to a regular (non-utf8-aware) StringScanner based on it's string" do
30
30
  raw = @utf8_scanner.as_raw
31
- raw.class.should eql(StringScanner)
32
- raw.string.should eql(@utf8_scanner.string)
31
+ assert_equal StringScanner, raw.class
32
+ assert_equal @utf8_scanner.string, raw.string
33
33
  end
34
34
 
35
- it "#getch should be utf8-aware" do
35
+ test "#getch should be utf8-aware" do
36
36
  i=0
37
37
  while char = @utf8_scanner.getch
38
- char.should eql(@char_array[i])
38
+ assert_equal @char_array[i], char
39
39
  i+=1
40
40
  end
41
41
  end
42
42
 
43
- it "should be able to be reset" do
43
+ test "should be able to be reset" do
44
44
  i=0
45
45
  while char = @utf8_scanner.getch
46
- char.should eql(@char_array[i])
46
+ assert_equal @char_array[i], char
47
47
  if i == 4
48
48
  break
49
49
  end
@@ -54,7 +54,7 @@ describe StringScanner::UTF8 do
54
54
 
55
55
  i=0
56
56
  while char = @utf8_scanner.getch
57
- char.should eql(@char_array[i])
57
+ assert_equal @char_array[i], char
58
58
  i+=1
59
59
  end
60
60
  end
data/spec/string_spec.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  require File.expand_path('../spec_helper', __FILE__)
3
3
 
4
4
  describe String::UTF8 do
5
- before(:each) do
5
+ before :each do
6
6
  @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
7
7
  @str = @char_array.join
8
8
  @utf8 = @str.as_utf8
@@ -10,65 +10,69 @@ describe String::UTF8 do
10
10
  @codepoints = @char_array.map{|c| c.unpack('U').first}
11
11
  end
12
12
 
13
- it "should blow up on invalid utf8 chars" do
13
+ test "should blow up on invalid utf8 chars" do
14
14
  # lets cut right into the middle of a sequence so we know it's bad
15
15
  @str.force_encoding('binary') if @str.respond_to?(:force_encoding)
16
16
  utf8 = @str[0..1]
17
17
  utf8.force_encoding('utf-8') if utf8.respond_to?(:force_encoding)
18
18
  utf8 = utf8.as_utf8
19
19
 
20
- lambda {
20
+ assert_raise ArgumentError do
21
21
  utf8.length
22
- }.should raise_error(ArgumentError)
22
+ end
23
23
 
24
- lambda {
24
+ assert_raise ArgumentError do
25
25
  utf8[0, 10]
26
- }.should raise_error(ArgumentError)
26
+ end
27
27
 
28
- lambda {
28
+ assert_raise ArgumentError do
29
29
  utf8.chars.to_a
30
- }.should raise_error(ArgumentError)
30
+ end
31
31
  end
32
32
 
33
- it "should extend String, adding an as_utf8 method that returns a String::UTF8 instance" do
34
- "".should respond_to(:as_utf8)
35
- "".as_utf8.class.should eql(String::UTF8)
33
+ test "should extend String, adding an as_utf8 method that returns a String::UTF8 instance" do
34
+ assert "".respond_to?(:as_utf8)
35
+ assert_equal String::UTF8, "".as_utf8.class
36
36
  end
37
37
 
38
- it "should allow access to the underlying raw string" do
38
+ test "should allow access to the underlying raw string" do
39
39
  raw = @utf8.as_raw
40
- raw.class.should eql(String)
40
+ assert_equal String, raw.class
41
41
  if defined? Encoding
42
- raw.length.should eql(@utf8_len)
42
+ assert_equal @utf8_len, raw.length
43
43
  else
44
- raw.length.should eql(@str.size)
44
+ assert_equal @str.size, raw.length
45
45
  end
46
46
  end
47
47
 
48
- it "should wrap all returned strings to be utf8-aware" do
49
- @utf8[0].class.should eql(String::UTF8)
50
- @utf8.chars.to_a[0].class.should eql(String::UTF8)
48
+ test "should wrap all returned strings to be utf8-aware" do
49
+ assert_equal String::UTF8, @utf8[0].class
50
+ assert_equal String::UTF8, @utf8.chars.to_a[0].class
51
51
  end
52
52
 
53
- it "clean should replace invalid utf8 chars with '?'" do
53
+ test "clean should replace invalid utf8 chars with '?'" do
54
54
  orig = "provided by Cristian Rodr\355guez."
55
55
  clean = "provided by Cristian Rodr?guez."
56
- orig.as_utf8.clean.should eql(clean)
56
+ assert_equal clean, orig.as_utf8.clean
57
+ assert_equal "asdf24??asdf24", "asdf24\206\222asdf24".as_utf8.clean
58
+ assert_equal "asdf24?asdf24", "asdf24\342asdf24".as_utf8.clean
59
+ assert_equal "asdf24??asdf24", "asdf24\342\206asdf24".as_utf8.clean
60
+ assert_equal "asdf24?asdf24", "asdf24\222asdf24".as_utf8.clean
57
61
  end
58
62
 
59
- it "clean should not replace valid utf8 chars with '?'" do
60
- '→'.as_utf8.clean.should eql('→')
63
+ test "clean should not replace valid utf8 chars with '?'" do
64
+ assert_equal "asdf24\342\206\222asdf24", "asdf24\342\206\222asdf24".as_utf8.clean
61
65
  end
62
66
 
63
67
  context "#length and #size" do
64
- it "should be utf8-aware" do
65
- @utf8.length.should eql(@utf8_len)
66
- @utf8.size.should eql(@utf8_len)
68
+ test "should be utf8-aware" do
69
+ assert_equal @utf8_len, @utf8.length
70
+ assert_equal @utf8_len, @utf8.size
67
71
  end
68
72
  end
69
73
 
70
74
  context "#chars and #each_char" do
71
- it "should be utf8-aware" do
75
+ test "should be utf8-aware" do
72
76
  klass = begin
73
77
  if defined? Encoding
74
78
  Enumerator
@@ -77,19 +81,19 @@ describe String::UTF8 do
77
81
  end
78
82
  end
79
83
 
80
- @utf8.chars.class.should eql(klass)
84
+ assert_equal klass, @utf8.chars.class
81
85
  @utf8.chars do |char|
82
- char.should_not be_nil
86
+ assert !char.nil?
83
87
  end
84
88
  joined = @utf8.chars.to_a.join
85
- @utf8.should eql(joined)
86
- @utf8.chars.to_a.size.should eql(@utf8_len)
87
- @utf8.chars.to_a.should eql(@char_array)
89
+ assert_equal joined, @utf8
90
+ assert_equal @utf8_len, @utf8.chars.to_a.size
91
+ assert_equal @char_array, @utf8.chars.to_a
88
92
  end
89
93
  end
90
94
 
91
95
  context "#codepoints and #each_codepoint" do
92
- it "should be utf8-aware" do
96
+ test "should be utf8-aware" do
93
97
  klass = begin
94
98
  if defined? Encoding
95
99
  Enumerator
@@ -98,124 +102,118 @@ describe String::UTF8 do
98
102
  end
99
103
  end
100
104
 
101
- @utf8.codepoints.class.should eql(klass)
105
+ assert_equal klass, @utf8.codepoints.class
102
106
  @utf8.codepoints do |codepoint|
103
- codepoint.should_not be_nil
107
+ assert !codepoint.nil?
104
108
  end
105
- @utf8.codepoints.to_a.size.should eql(@codepoints.size)
106
- @utf8.codepoints.to_a.should eql(@codepoints)
109
+ assert_equal @codepoints.size, @utf8.codepoints.to_a.size
110
+ assert_equal @codepoints, @utf8.codepoints.to_a
107
111
  end
108
112
  end
109
113
 
110
114
  context "[offset] syntax" do
111
- it "should be utf8-aware" do
115
+ test "should be utf8-aware" do
112
116
  @char_array.each_with_index do |char, i|
113
117
  utf8_char = @utf8[i]
114
- utf8_char.should eql(char)
118
+ assert_equal char, utf8_char
115
119
  end
116
120
  end
117
121
 
118
- it "should support negative indices" do
122
+ test "should support negative indices" do
119
123
  utf8_char = @utf8[-5]
120
- utf8_char.should eql(@char_array[-5])
124
+ assert_equal @char_array[-5], utf8_char
121
125
  end
122
126
 
123
- it "should return nil for out of range indices" do
124
- @utf8[100].should be_nil
125
- @utf8[-100].should be_nil
127
+ test "should return nil for out of range indices" do
128
+ assert @utf8[100].nil?
129
+ assert @utf8[-100].nil?
126
130
  end
127
131
  end
128
132
 
129
133
  context "[offset, length] syntax" do
130
- it "should be utf8-aware" do
131
- utf8_char = @utf8[1, 4]
132
- utf8_char.should eql(@char_array[1, 4].join)
133
-
134
- utf8_char = @utf8[0, 6]
135
- utf8_char.should eql(@char_array[0, 6].join)
134
+ test "should be utf8-aware" do
135
+ assert_equal @char_array[1, 4].join, @utf8[1, 4]
136
+ assert_equal @char_array[0, 6].join, @utf8[0, 6]
136
137
 
137
138
  # this will fail due to a bug in 1.9
138
139
  unless defined? Encoding
139
- utf8_char = @utf8[6, 100]
140
- utf8_char.should eql(@char_array[6, 100].join)
140
+ assert_equal @char_array[6, 100].join, @utf8[6, 100]
141
141
  end
142
142
 
143
- utf8_char = @utf8[-1, 2]
144
- utf8_char.should eql(@char_array[-1, 2].join)
145
-
146
- utf8_char = @utf8[-1, 100]
147
- utf8_char.should eql(@char_array[-1, 100].join)
148
-
149
- utf8_char = @utf8[0, 0]
150
- utf8_char.should eql(@char_array[0, 0].join)
143
+ assert_equal @char_array[-1, 2].join, @utf8[-1, 2]
144
+ assert_equal @char_array[-1, 100].join, @utf8[-1, 100]
145
+ assert_equal @char_array[0, 0].join, @utf8[0, 0]
151
146
  end
152
147
 
153
- it "should return nil for an out of range offset or length" do
154
- @utf8[100, 100].should be_nil
155
- @utf8[-100, 100].should be_nil
156
- @utf8[0, -100].should be_nil
148
+ test "should return nil for an out of range offset or length" do
149
+ assert @utf8[100, 100].nil?
150
+ assert @utf8[-100, 100].nil?
151
+ assert @utf8[0, -100].nil?
157
152
  end
158
153
  end
159
154
 
160
155
  context "[Range] syntax" do
161
- it "should be utf8-aware" do
162
- utf8_char = @utf8[1..4]
163
- utf8_char.should eql(@char_array[1..4].join)
164
-
165
- utf8_char = @utf8[0..6]
166
- utf8_char.should eql(@char_array[0..6].join)
156
+ test "should be utf8-aware" do
157
+ assert_equal @char_array[1..4].join, @utf8[1..4]
158
+ assert_equal @char_array[0..6].join, @utf8[0..6]
167
159
 
168
160
  # this will fail due to a bug in 1.9
169
161
  unless defined? Encoding
170
- utf8_char = @utf8[6..100]
171
- utf8_char.should eql(@char_array[6..100].join)
162
+ assert_equal @char_array[6..100].join, @utf8[6..100]
172
163
  end
173
164
 
174
- utf8_char = @utf8[-1..2]
175
- utf8_char.should eql(@char_array[-1..2].join)
176
-
177
- utf8_char = @utf8[-1..100]
178
- utf8_char.should eql(@char_array[-1..100].join)
165
+ assert_equal @char_array[-1..2].join, @utf8[-1..2]
166
+ assert_equal @char_array[-1..100].join, @utf8[-1..100]
179
167
  end
180
168
 
181
- it "should return nil for an out of range offset or length" do
182
- @utf8[100..100].should be_nil
183
- @utf8[-100..100].should be_nil
184
- @utf8[0..-100].should eql("")
169
+ test "should return nil for an out of range offset or length" do
170
+ assert @utf8[100..100].nil?
171
+ assert @utf8[-100..100].nil?
172
+ assert_equal "", @utf8[0..-100]
185
173
  end
186
174
  end
187
175
 
188
176
  context "#valid?" do
189
- it "should test validity" do
177
+ test "should test validity" do
190
178
  # lets cut right into the middle of a sequence so we know it's bad
191
179
  @str.force_encoding('binary') if @str.respond_to?(:force_encoding)
192
180
  utf8 = @str[0..1]
193
181
  utf8.force_encoding('utf-8') if utf8.respond_to?(:force_encoding)
194
182
  utf8 = utf8.as_utf8
195
183
 
196
- utf8.valid?.should be_false
197
- @utf8.valid?.should be_true
184
+ assert !utf8.valid?
185
+ assert @utf8.valid?
198
186
 
199
- "provided by Cristian Rodr\355guez.".as_utf8.should_not be_valid
187
+ assert !"provided by Cristian Rodr\355guez.".as_utf8.valid?
200
188
  end
201
189
 
202
- it "should test validity using a maximum codepoint" do
190
+ test "should test validity using a maximum codepoint" do
203
191
  highest_codepoint = @utf8.codepoints.to_a.max
204
192
 
205
- @utf8.valid?(highest_codepoint).should be_true
206
- @utf8.valid?(highest_codepoint-1).should be_false
193
+ assert @utf8.valid?(highest_codepoint)
194
+ assert !@utf8.valid?(highest_codepoint-1)
207
195
  end
208
196
  end
209
197
 
210
- it "[Regexp] syntax shouldn't be supported yet" do
211
- lambda {
198
+ test "[Regexp] syntax shouldn't be supported yet" do
199
+ assert_raise ArgumentError do
212
200
  @utf8[/a/]
213
- }.should raise_error(ArgumentError)
201
+ end
214
202
  end
215
203
 
216
- it "[Regexp, match_index] syntax shouldn't be supported yet" do
217
- lambda {
204
+ test "[Regexp, match_index] syntax shouldn't be supported yet" do
205
+ assert_raise ArgumentError do
218
206
  @utf8[/(a)/, 1]
219
- }.should raise_error(ArgumentError)
207
+ end
208
+ end
209
+
210
+ context "#ascii_only" do
211
+ test "should return true for a string within the low ascii range" do
212
+ assert "asdf".as_utf8.ascii_only?
213
+ end
214
+
215
+ test "should return false for a string within the low ascii range" do
216
+ assert !@char_array.first.as_utf8.ascii_only?
217
+ end
220
218
  end
221
219
  end
data/utf8.gemspec CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
13
13
  s.files = `git ls-files`.split("\n")
14
14
  s.homepage = %q{http://github.com/brianmario/utf8}
15
15
  s.rdoc_options = ["--charset=UTF-8"]
16
- s.require_paths = ["lib", "ext"]
16
+ s.require_paths = ["lib"]
17
17
  s.rubygems_version = %q{1.4.2}
18
18
  s.summary = %q{A lightweight UTF8-aware String class meant for use with Ruby 1.8}
19
19
  s.test_files = `git ls-files spec`.split("\n")
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: utf8
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 6
10
- version: 0.1.6
9
+ - 7
10
+ version: 0.1.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brian Lopez
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-07-25 00:00:00 -04:00
19
- default_executable:
18
+ date: 2011-11-22 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: rake-compiler
@@ -75,6 +74,7 @@ extra_rdoc_files:
75
74
  files:
76
75
  - .gitignore
77
76
  - .rspec
77
+ - .travis.yml
78
78
  - Gemfile
79
79
  - MIT-LICENSE
80
80
  - README.rdoc
@@ -98,7 +98,6 @@ files:
98
98
  - spec/string_scanner_spec.rb
99
99
  - spec/string_spec.rb
100
100
  - utf8.gemspec
101
- has_rdoc: true
102
101
  homepage: http://github.com/brianmario/utf8
103
102
  licenses: []
104
103
 
@@ -107,7 +106,6 @@ rdoc_options:
107
106
  - --charset=UTF-8
108
107
  require_paths:
109
108
  - lib
110
- - ext
111
109
  required_ruby_version: !ruby/object:Gem::Requirement
112
110
  none: false
113
111
  requirements:
@@ -129,7 +127,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
129
127
  requirements: []
130
128
 
131
129
  rubyforge_project:
132
- rubygems_version: 1.6.2
130
+ rubygems_version: 1.8.11
133
131
  signing_key:
134
132
  specification_version: 3
135
133
  summary: A lightweight UTF8-aware String class meant for use with Ruby 1.8