utf8 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ rvm:
2
+ - 1.8.7
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - rbx
6
+ - ree
@@ -5,6 +5,7 @@ require 'benchmark'
5
5
  require 'rubygems'
6
6
  require 'active_support'
7
7
 
8
+ $KCODE = 'UTF8'
8
9
 
9
10
  raw = File.read(File.expand_path('../test.txt', __FILE__))
10
11
  utf8 = raw.as_utf8
@@ -29,9 +30,15 @@ Benchmark.bmbm { |x|
29
30
  x.report("#[-start, len]") {
30
31
  times.times {utf8[-1024, 1024]}
31
32
  }
33
+ x.report("#clean") {
34
+ times.times {utf8.clean}
35
+ }
36
+ x.report("#valid?") {
37
+ times.times {utf8.valid?}
38
+ }
32
39
  }
33
40
 
34
- puts "\n\nActiveSupport::Multibyte::Chars"
41
+ puts "\n\nActiveSupport::Multibyte"
35
42
  Benchmark.bmbm { |x|
36
43
  x.report("#length") {
37
44
  times.times {as_mb.length}
@@ -48,4 +55,35 @@ Benchmark.bmbm { |x|
48
55
  x.report("#[-start, len]") {
49
56
  times.times {as_mb[-1024, 1024]}
50
57
  }
58
+ x.report("ActiveSupport::Multibyte.clean") {
59
+ times.times {ActiveSupport::Multibyte.clean(raw)}
60
+ }
61
+ x.report("ActiveSupport::Multibyte.verify") {
62
+ times.times {ActiveSupport::Multibyte.verify(raw)}
63
+ }
64
+ }
65
+
66
+ require 'iconv'
67
+ module ActiveSupport::Multibyte
68
+ class << self
69
+ OUTSIDE_ASCII = /[^\x00-\x7f]/n
70
+ ICONV_CLEANER = Iconv.new('UTF-8//IGNORE', 'UTF-8')
71
+
72
+ def clean_with_iconv(string)
73
+ if string =~ OUTSIDE_ASCII
74
+ ICONV_CLEANER.iconv(string + ' ')[0..-2]
75
+ else
76
+ string
77
+ end
78
+ end
79
+
80
+ alias_method :clean_without_iconv, :clean
81
+ alias_method :clean, :clean_with_iconv
82
+ end
83
+ end
84
+ puts "\n\nActiveSupport::Multibyte (patched with Iconv)"
85
+ Benchmark.bmbm { |x|
86
+ x.report("ActiveSupport::Multibyte.clean") {
87
+ times.times {ActiveSupport::Multibyte.clean(raw)}
88
+ }
51
89
  }
data/ext/utf8/extconf.rb CHANGED
@@ -4,4 +4,4 @@ require 'rbconfig'
4
4
  $CFLAGS << ' -Wall -funroll-loops'
5
5
  $CFLAGS << ' -Wextra -O0 -ggdb3' if ENV['DEBUG']
6
6
 
7
- create_makefile("utf8")
7
+ create_makefile("utf8/utf8")
@@ -93,7 +93,7 @@ static VALUE rb_cString_UTF8_each_codepoint(int argc, VALUE *argv, VALUE self) {
93
93
  /*
94
94
  * call-seq: valid?(max_codepoint=nil)
95
95
  *
96
- * Iterates over the string, yielding one UTF-8 codepoint at a time
96
+ * Iterates over the string, returning true/false if it's valid UTF-8
97
97
  *
98
98
  * max_codepoint - an optional Fixnum used to declare this string invalid
99
99
  * if a codepoint higher than that value is found
@@ -337,44 +337,73 @@ static VALUE rb_cString_UTF8_slice(int argc, VALUE *argv, VALUE self) {
337
337
  * Returns: a new String
338
338
  */
339
339
  static VALUE rb_cString_UTF8_clean(VALUE self) {
340
- unsigned char *str;
341
- unsigned char *out;
342
- unsigned char replace;
343
- size_t len;
340
+ unsigned char *inBuf, *inBufCur;
341
+ unsigned char *outBuf, *outBufCur;
342
+ size_t len, i;
344
343
  int8_t curCharLen;
345
- size_t i;
346
344
  VALUE rb_out;
347
345
 
348
- str = (unsigned char *)RSTRING_PTR(self);
346
+ inBuf = (unsigned char *)RSTRING_PTR(self);
347
+ inBufCur = inBuf;
349
348
  len = RSTRING_LEN(self);
350
- replace = REPLACEMENT_CHAR;
351
- out = xmalloc(len);
349
+ outBuf = malloc(len);
350
+ outBufCur = outBuf;
352
351
 
353
352
  for(i=0; i<len; i+=curCharLen) {
354
- curCharLen = utf8CharLen(str+i, len);
353
+ curCharLen = utf8CharLen(inBufCur, len);
355
354
  if (curCharLen < 0) {
356
- *(out+i) = replace;
355
+ if (inBufCur-inBuf > 0) {
356
+ memcpy(outBufCur, inBuf, inBufCur-inBuf);
357
+ outBufCur += inBufCur-inBuf;
358
+ }
359
+ *outBufCur++ = REPLACEMENT_CHAR;
360
+ inBuf += (inBufCur-inBuf)+1;
357
361
  curCharLen = 1;
358
- } else {
359
- memcpy(out+i, str+i, curCharLen);
360
362
  }
363
+
364
+ inBufCur += curCharLen;
365
+ }
366
+
367
+ if (inBufCur-inBuf > 0) {
368
+ memcpy(outBufCur, inBuf, inBufCur-inBuf);
361
369
  }
362
370
 
363
- rb_out = rb_str_new((const char*)out, len);
371
+ rb_out = rb_str_new((const char*)outBuf, len);
364
372
  AS_UTF8(rb_out);
365
373
 
366
- xfree(out);
374
+ free(outBuf);
367
375
 
368
376
  return rb_out;
369
377
  }
370
378
 
379
+ /*
380
+ * call-seq: clean
381
+ *
382
+ * Iterates over the string, returning true/false if it's within the low ASCII range
383
+ *
384
+ * Returns: a Boolean - true if the string is within the low ASCII range, false if not
385
+ */
386
+ static VALUE rb_cString_UTF8_ascii_only(VALUE self) {
387
+ unsigned char *str = (unsigned char *)RSTRING_PTR(self);
388
+ size_t len = RSTRING_LEN(self), i=0;
389
+
390
+ for(; i<len; i+=1) {
391
+ if (str[i] > 0x7f) {
392
+ return Qfalse;
393
+ }
394
+ }
395
+
396
+ return Qtrue;
397
+ }
398
+
371
399
  void init_String_UTF8() {
372
400
  VALUE rb_cString_UTF8 = rb_define_class_under(rb_cString, "UTF8", rb_cString);
373
401
 
374
- rb_define_method(rb_cString_UTF8, "length", rb_cString_UTF8_length, 0);
402
+ rb_define_method(rb_cString_UTF8, "length", rb_cString_UTF8_length, 0);
375
403
  rb_define_method(rb_cString_UTF8, "each_char", rb_cString_UTF8_each_char, -1);
376
- rb_define_method(rb_cString_UTF8, "[]", rb_cString_UTF8_slice, -1);
404
+ rb_define_method(rb_cString_UTF8, "[]", rb_cString_UTF8_slice, -1);
377
405
  rb_define_method(rb_cString_UTF8, "each_codepoint", rb_cString_UTF8_each_codepoint, -1);
378
406
  rb_define_method(rb_cString_UTF8, "valid?", rb_cString_UTF8_valid, -1);
379
407
  rb_define_method(rb_cString_UTF8, "clean", rb_cString_UTF8_clean, 0);
408
+ rb_define_method(rb_cString_UTF8, "ascii_only?", rb_cString_UTF8_ascii_only, 0);
380
409
  }
data/ext/utf8/utf8.c CHANGED
@@ -7,7 +7,7 @@
7
7
  * Scans the current position of the buffer
8
8
  * returning the length of this UTF-8 character
9
9
  */
10
- inline int8_t utf8CharLen(unsigned char *in, size_t in_len) {
10
+ int8_t utf8CharLen(unsigned char *in, size_t in_len) {
11
11
  if (in_len > 0) {
12
12
  unsigned char curChar, *start;
13
13
 
data/ext/utf8/utf8.h CHANGED
@@ -1,7 +1,7 @@
1
1
  #ifndef UTF8_UTF8_H
2
2
  #define UTF8_UTF8_H
3
3
 
4
- inline int8_t utf8CharLen(unsigned char *in, size_t in_len);
4
+ int8_t utf8CharLen(unsigned char *in, size_t in_len);
5
5
  int64_t utf8CharCount(unsigned char *in, size_t in_len);
6
6
  int32_t utf8CharToCodepoint(unsigned char *in, size_t in_len);
7
7
 
data/lib/utf8/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  class String
2
2
  class UTF8 < ::String
3
- VERSION = "0.1.6"
3
+ VERSION = "0.1.7"
4
4
  end
5
5
  end
data/spec/spec_helper.rb CHANGED
@@ -2,4 +2,9 @@ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
2
  require 'utf8'
3
3
  require 'utf8/string_scanner'
4
4
 
5
- require 'rspec' unless defined? Rspec
5
+ require 'rspec' unless defined? RSpec
6
+
7
+ RSpec.configure do |config|
8
+ config.expect_with :stdlib
9
+ config.alias_example_to :test
10
+ end
@@ -2,13 +2,13 @@
2
2
  require File.expand_path('../spec_helper', __FILE__)
3
3
 
4
4
  describe StringScanner::UTF8 do
5
- before(:each) do
5
+ before :each do
6
6
  @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
7
7
  @scanner = StringScanner.new(@char_array.join)
8
8
  @utf8_scanner = @scanner.as_utf8
9
9
  end
10
10
 
11
- it "should blow up on invalid utf8 chars" do
11
+ test "should blow up on invalid utf8 chars" do
12
12
  # lets cut right into the middle of a sequence so we know it's bad
13
13
  str = @char_array.join
14
14
  str.force_encoding('binary') if str.respond_to?(:force_encoding)
@@ -16,34 +16,34 @@ describe StringScanner::UTF8 do
16
16
  str.force_encoding('utf-8') if str.respond_to?(:force_encoding)
17
17
  scanner = StringScanner.new(str).as_utf8
18
18
 
19
- lambda {
19
+ assert_raise ArgumentError do
20
20
  scanner.getch
21
- }.should raise_error(ArgumentError)
21
+ end
22
22
  end
23
23
 
24
- it "should extend StringScanner, adding an as_utf8 method that returns a StringScanner::UTF8 instance" do
25
- @scanner.should respond_to(:as_utf8)
26
- @scanner.as_utf8.class.should eql(StringScanner::UTF8)
24
+ test "should extend StringScanner, adding an as_utf8 method that returns a StringScanner::UTF8 instance" do
25
+ assert @scanner.respond_to?(:as_utf8)
26
+ assert_equal StringScanner::UTF8, @scanner.as_utf8.class
27
27
  end
28
28
 
29
- it "should allow access to a regular (non-utf8-aware) StringScanner based on it's string" do
29
+ test "should allow access to a regular (non-utf8-aware) StringScanner based on it's string" do
30
30
  raw = @utf8_scanner.as_raw
31
- raw.class.should eql(StringScanner)
32
- raw.string.should eql(@utf8_scanner.string)
31
+ assert_equal StringScanner, raw.class
32
+ assert_equal @utf8_scanner.string, raw.string
33
33
  end
34
34
 
35
- it "#getch should be utf8-aware" do
35
+ test "#getch should be utf8-aware" do
36
36
  i=0
37
37
  while char = @utf8_scanner.getch
38
- char.should eql(@char_array[i])
38
+ assert_equal @char_array[i], char
39
39
  i+=1
40
40
  end
41
41
  end
42
42
 
43
- it "should be able to be reset" do
43
+ test "should be able to be reset" do
44
44
  i=0
45
45
  while char = @utf8_scanner.getch
46
- char.should eql(@char_array[i])
46
+ assert_equal @char_array[i], char
47
47
  if i == 4
48
48
  break
49
49
  end
@@ -54,7 +54,7 @@ describe StringScanner::UTF8 do
54
54
 
55
55
  i=0
56
56
  while char = @utf8_scanner.getch
57
- char.should eql(@char_array[i])
57
+ assert_equal @char_array[i], char
58
58
  i+=1
59
59
  end
60
60
  end
data/spec/string_spec.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  require File.expand_path('../spec_helper', __FILE__)
3
3
 
4
4
  describe String::UTF8 do
5
- before(:each) do
5
+ before :each do
6
6
  @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
7
7
  @str = @char_array.join
8
8
  @utf8 = @str.as_utf8
@@ -10,65 +10,69 @@ describe String::UTF8 do
10
10
  @codepoints = @char_array.map{|c| c.unpack('U').first}
11
11
  end
12
12
 
13
- it "should blow up on invalid utf8 chars" do
13
+ test "should blow up on invalid utf8 chars" do
14
14
  # lets cut right into the middle of a sequence so we know it's bad
15
15
  @str.force_encoding('binary') if @str.respond_to?(:force_encoding)
16
16
  utf8 = @str[0..1]
17
17
  utf8.force_encoding('utf-8') if utf8.respond_to?(:force_encoding)
18
18
  utf8 = utf8.as_utf8
19
19
 
20
- lambda {
20
+ assert_raise ArgumentError do
21
21
  utf8.length
22
- }.should raise_error(ArgumentError)
22
+ end
23
23
 
24
- lambda {
24
+ assert_raise ArgumentError do
25
25
  utf8[0, 10]
26
- }.should raise_error(ArgumentError)
26
+ end
27
27
 
28
- lambda {
28
+ assert_raise ArgumentError do
29
29
  utf8.chars.to_a
30
- }.should raise_error(ArgumentError)
30
+ end
31
31
  end
32
32
 
33
- it "should extend String, adding an as_utf8 method that returns a String::UTF8 instance" do
34
- "".should respond_to(:as_utf8)
35
- "".as_utf8.class.should eql(String::UTF8)
33
+ test "should extend String, adding an as_utf8 method that returns a String::UTF8 instance" do
34
+ assert "".respond_to?(:as_utf8)
35
+ assert_equal String::UTF8, "".as_utf8.class
36
36
  end
37
37
 
38
- it "should allow access to the underlying raw string" do
38
+ test "should allow access to the underlying raw string" do
39
39
  raw = @utf8.as_raw
40
- raw.class.should eql(String)
40
+ assert_equal String, raw.class
41
41
  if defined? Encoding
42
- raw.length.should eql(@utf8_len)
42
+ assert_equal @utf8_len, raw.length
43
43
  else
44
- raw.length.should eql(@str.size)
44
+ assert_equal @str.size, raw.length
45
45
  end
46
46
  end
47
47
 
48
- it "should wrap all returned strings to be utf8-aware" do
49
- @utf8[0].class.should eql(String::UTF8)
50
- @utf8.chars.to_a[0].class.should eql(String::UTF8)
48
+ test "should wrap all returned strings to be utf8-aware" do
49
+ assert_equal String::UTF8, @utf8[0].class
50
+ assert_equal String::UTF8, @utf8.chars.to_a[0].class
51
51
  end
52
52
 
53
- it "clean should replace invalid utf8 chars with '?'" do
53
+ test "clean should replace invalid utf8 chars with '?'" do
54
54
  orig = "provided by Cristian Rodr\355guez."
55
55
  clean = "provided by Cristian Rodr?guez."
56
- orig.as_utf8.clean.should eql(clean)
56
+ assert_equal clean, orig.as_utf8.clean
57
+ assert_equal "asdf24??asdf24", "asdf24\206\222asdf24".as_utf8.clean
58
+ assert_equal "asdf24?asdf24", "asdf24\342asdf24".as_utf8.clean
59
+ assert_equal "asdf24??asdf24", "asdf24\342\206asdf24".as_utf8.clean
60
+ assert_equal "asdf24?asdf24", "asdf24\222asdf24".as_utf8.clean
57
61
  end
58
62
 
59
- it "clean should not replace valid utf8 chars with '?'" do
60
- '→'.as_utf8.clean.should eql('→')
63
+ test "clean should not replace valid utf8 chars with '?'" do
64
+ assert_equal "asdf24\342\206\222asdf24", "asdf24\342\206\222asdf24".as_utf8.clean
61
65
  end
62
66
 
63
67
  context "#length and #size" do
64
- it "should be utf8-aware" do
65
- @utf8.length.should eql(@utf8_len)
66
- @utf8.size.should eql(@utf8_len)
68
+ test "should be utf8-aware" do
69
+ assert_equal @utf8_len, @utf8.length
70
+ assert_equal @utf8_len, @utf8.size
67
71
  end
68
72
  end
69
73
 
70
74
  context "#chars and #each_char" do
71
- it "should be utf8-aware" do
75
+ test "should be utf8-aware" do
72
76
  klass = begin
73
77
  if defined? Encoding
74
78
  Enumerator
@@ -77,19 +81,19 @@ describe String::UTF8 do
77
81
  end
78
82
  end
79
83
 
80
- @utf8.chars.class.should eql(klass)
84
+ assert_equal klass, @utf8.chars.class
81
85
  @utf8.chars do |char|
82
- char.should_not be_nil
86
+ assert !char.nil?
83
87
  end
84
88
  joined = @utf8.chars.to_a.join
85
- @utf8.should eql(joined)
86
- @utf8.chars.to_a.size.should eql(@utf8_len)
87
- @utf8.chars.to_a.should eql(@char_array)
89
+ assert_equal joined, @utf8
90
+ assert_equal @utf8_len, @utf8.chars.to_a.size
91
+ assert_equal @char_array, @utf8.chars.to_a
88
92
  end
89
93
  end
90
94
 
91
95
  context "#codepoints and #each_codepoint" do
92
- it "should be utf8-aware" do
96
+ test "should be utf8-aware" do
93
97
  klass = begin
94
98
  if defined? Encoding
95
99
  Enumerator
@@ -98,124 +102,118 @@ describe String::UTF8 do
98
102
  end
99
103
  end
100
104
 
101
- @utf8.codepoints.class.should eql(klass)
105
+ assert_equal klass, @utf8.codepoints.class
102
106
  @utf8.codepoints do |codepoint|
103
- codepoint.should_not be_nil
107
+ assert !codepoint.nil?
104
108
  end
105
- @utf8.codepoints.to_a.size.should eql(@codepoints.size)
106
- @utf8.codepoints.to_a.should eql(@codepoints)
109
+ assert_equal @codepoints.size, @utf8.codepoints.to_a.size
110
+ assert_equal @codepoints, @utf8.codepoints.to_a
107
111
  end
108
112
  end
109
113
 
110
114
  context "[offset] syntax" do
111
- it "should be utf8-aware" do
115
+ test "should be utf8-aware" do
112
116
  @char_array.each_with_index do |char, i|
113
117
  utf8_char = @utf8[i]
114
- utf8_char.should eql(char)
118
+ assert_equal char, utf8_char
115
119
  end
116
120
  end
117
121
 
118
- it "should support negative indices" do
122
+ test "should support negative indices" do
119
123
  utf8_char = @utf8[-5]
120
- utf8_char.should eql(@char_array[-5])
124
+ assert_equal @char_array[-5], utf8_char
121
125
  end
122
126
 
123
- it "should return nil for out of range indices" do
124
- @utf8[100].should be_nil
125
- @utf8[-100].should be_nil
127
+ test "should return nil for out of range indices" do
128
+ assert @utf8[100].nil?
129
+ assert @utf8[-100].nil?
126
130
  end
127
131
  end
128
132
 
129
133
  context "[offset, length] syntax" do
130
- it "should be utf8-aware" do
131
- utf8_char = @utf8[1, 4]
132
- utf8_char.should eql(@char_array[1, 4].join)
133
-
134
- utf8_char = @utf8[0, 6]
135
- utf8_char.should eql(@char_array[0, 6].join)
134
+ test "should be utf8-aware" do
135
+ assert_equal @char_array[1, 4].join, @utf8[1, 4]
136
+ assert_equal @char_array[0, 6].join, @utf8[0, 6]
136
137
 
137
138
  # this will fail due to a bug in 1.9
138
139
  unless defined? Encoding
139
- utf8_char = @utf8[6, 100]
140
- utf8_char.should eql(@char_array[6, 100].join)
140
+ assert_equal @char_array[6, 100].join, @utf8[6, 100]
141
141
  end
142
142
 
143
- utf8_char = @utf8[-1, 2]
144
- utf8_char.should eql(@char_array[-1, 2].join)
145
-
146
- utf8_char = @utf8[-1, 100]
147
- utf8_char.should eql(@char_array[-1, 100].join)
148
-
149
- utf8_char = @utf8[0, 0]
150
- utf8_char.should eql(@char_array[0, 0].join)
143
+ assert_equal @char_array[-1, 2].join, @utf8[-1, 2]
144
+ assert_equal @char_array[-1, 100].join, @utf8[-1, 100]
145
+ assert_equal @char_array[0, 0].join, @utf8[0, 0]
151
146
  end
152
147
 
153
- it "should return nil for an out of range offset or length" do
154
- @utf8[100, 100].should be_nil
155
- @utf8[-100, 100].should be_nil
156
- @utf8[0, -100].should be_nil
148
+ test "should return nil for an out of range offset or length" do
149
+ assert @utf8[100, 100].nil?
150
+ assert @utf8[-100, 100].nil?
151
+ assert @utf8[0, -100].nil?
157
152
  end
158
153
  end
159
154
 
160
155
  context "[Range] syntax" do
161
- it "should be utf8-aware" do
162
- utf8_char = @utf8[1..4]
163
- utf8_char.should eql(@char_array[1..4].join)
164
-
165
- utf8_char = @utf8[0..6]
166
- utf8_char.should eql(@char_array[0..6].join)
156
+ test "should be utf8-aware" do
157
+ assert_equal @char_array[1..4].join, @utf8[1..4]
158
+ assert_equal @char_array[0..6].join, @utf8[0..6]
167
159
 
168
160
  # this will fail due to a bug in 1.9
169
161
  unless defined? Encoding
170
- utf8_char = @utf8[6..100]
171
- utf8_char.should eql(@char_array[6..100].join)
162
+ assert_equal @char_array[6..100].join, @utf8[6..100]
172
163
  end
173
164
 
174
- utf8_char = @utf8[-1..2]
175
- utf8_char.should eql(@char_array[-1..2].join)
176
-
177
- utf8_char = @utf8[-1..100]
178
- utf8_char.should eql(@char_array[-1..100].join)
165
+ assert_equal @char_array[-1..2].join, @utf8[-1..2]
166
+ assert_equal @char_array[-1..100].join, @utf8[-1..100]
179
167
  end
180
168
 
181
- it "should return nil for an out of range offset or length" do
182
- @utf8[100..100].should be_nil
183
- @utf8[-100..100].should be_nil
184
- @utf8[0..-100].should eql("")
169
+ test "should return nil for an out of range offset or length" do
170
+ assert @utf8[100..100].nil?
171
+ assert @utf8[-100..100].nil?
172
+ assert_equal "", @utf8[0..-100]
185
173
  end
186
174
  end
187
175
 
188
176
  context "#valid?" do
189
- it "should test validity" do
177
+ test "should test validity" do
190
178
  # lets cut right into the middle of a sequence so we know it's bad
191
179
  @str.force_encoding('binary') if @str.respond_to?(:force_encoding)
192
180
  utf8 = @str[0..1]
193
181
  utf8.force_encoding('utf-8') if utf8.respond_to?(:force_encoding)
194
182
  utf8 = utf8.as_utf8
195
183
 
196
- utf8.valid?.should be_false
197
- @utf8.valid?.should be_true
184
+ assert !utf8.valid?
185
+ assert @utf8.valid?
198
186
 
199
- "provided by Cristian Rodr\355guez.".as_utf8.should_not be_valid
187
+ assert !"provided by Cristian Rodr\355guez.".as_utf8.valid?
200
188
  end
201
189
 
202
- it "should test validity using a maximum codepoint" do
190
+ test "should test validity using a maximum codepoint" do
203
191
  highest_codepoint = @utf8.codepoints.to_a.max
204
192
 
205
- @utf8.valid?(highest_codepoint).should be_true
206
- @utf8.valid?(highest_codepoint-1).should be_false
193
+ assert @utf8.valid?(highest_codepoint)
194
+ assert !@utf8.valid?(highest_codepoint-1)
207
195
  end
208
196
  end
209
197
 
210
- it "[Regexp] syntax shouldn't be supported yet" do
211
- lambda {
198
+ test "[Regexp] syntax shouldn't be supported yet" do
199
+ assert_raise ArgumentError do
212
200
  @utf8[/a/]
213
- }.should raise_error(ArgumentError)
201
+ end
214
202
  end
215
203
 
216
- it "[Regexp, match_index] syntax shouldn't be supported yet" do
217
- lambda {
204
+ test "[Regexp, match_index] syntax shouldn't be supported yet" do
205
+ assert_raise ArgumentError do
218
206
  @utf8[/(a)/, 1]
219
- }.should raise_error(ArgumentError)
207
+ end
208
+ end
209
+
210
+ context "#ascii_only" do
211
+ test "should return true for a string within the low ascii range" do
212
+ assert "asdf".as_utf8.ascii_only?
213
+ end
214
+
215
+ test "should return false for a string within the low ascii range" do
216
+ assert !@char_array.first.as_utf8.ascii_only?
217
+ end
220
218
  end
221
219
  end
data/utf8.gemspec CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |s|
13
13
  s.files = `git ls-files`.split("\n")
14
14
  s.homepage = %q{http://github.com/brianmario/utf8}
15
15
  s.rdoc_options = ["--charset=UTF-8"]
16
- s.require_paths = ["lib", "ext"]
16
+ s.require_paths = ["lib"]
17
17
  s.rubygems_version = %q{1.4.2}
18
18
  s.summary = %q{A lightweight UTF8-aware String class meant for use with Ruby 1.8}
19
19
  s.test_files = `git ls-files spec`.split("\n")
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: utf8
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 6
10
- version: 0.1.6
9
+ - 7
10
+ version: 0.1.7
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brian Lopez
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-07-25 00:00:00 -04:00
19
- default_executable:
18
+ date: 2011-11-22 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: rake-compiler
@@ -75,6 +74,7 @@ extra_rdoc_files:
75
74
  files:
76
75
  - .gitignore
77
76
  - .rspec
77
+ - .travis.yml
78
78
  - Gemfile
79
79
  - MIT-LICENSE
80
80
  - README.rdoc
@@ -98,7 +98,6 @@ files:
98
98
  - spec/string_scanner_spec.rb
99
99
  - spec/string_spec.rb
100
100
  - utf8.gemspec
101
- has_rdoc: true
102
101
  homepage: http://github.com/brianmario/utf8
103
102
  licenses: []
104
103
 
@@ -107,7 +106,6 @@ rdoc_options:
107
106
  - --charset=UTF-8
108
107
  require_paths:
109
108
  - lib
110
- - ext
111
109
  required_ruby_version: !ruby/object:Gem::Requirement
112
110
  none: false
113
111
  requirements:
@@ -129,7 +127,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
129
127
  requirements: []
130
128
 
131
129
  rubyforge_project:
132
- rubygems_version: 1.6.2
130
+ rubygems_version: 1.8.11
133
131
  signing_key:
134
132
  specification_version: 3
135
133
  summary: A lightweight UTF8-aware String class meant for use with Ruby 1.8