utf8 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,4 +1,5 @@
1
1
  .DS_Store
2
2
  *.bundle
3
3
  *.o
4
- tmp/
4
+ *.rbc
5
+ tmp/
data/README.rdoc CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  The scanning code is implemented in C (would love a Java version as well, if any of you want to help with that).
4
4
 
5
+ At the moment, this gem is tested on 1.8.7, 1.9.2 and Rubinius 1.2.1dev - it may work on others but ymmv.
6
+
5
7
  == String::UTF8 Example
6
8
 
7
9
  The String::UTF8#[] API isn't fully supported yet, and may never support regular expressions. I'll update this readme as I make progress.
@@ -1,9 +1,9 @@
1
1
  #include "ext.h"
2
- #include "ruby/regex.h"
3
2
  #include "utf8.h"
4
3
 
5
4
  extern ID intern_as_utf8;
6
5
 
6
+ #ifndef RUBINIUS
7
7
  struct strscanner {
8
8
  /* multi-purpose flags */
9
9
  unsigned long flags;
@@ -15,13 +15,19 @@ struct strscanner {
15
15
  long prev; /* legal only when MATCHED_P(s) */
16
16
  long curr; /* always legal */
17
17
 
18
+ /*
19
+ * We never access this member, and would require a shitload of other patching
20
+ * to work right on other ruby versions
21
+ *
22
+ */
18
23
  /* the regexp register; legal only when MATCHED_P(s) */
19
- struct re_registers regs;
24
+ /* struct re_registers regs; */
20
25
  };
21
26
 
22
27
  #define GET_SCANNER(obj, var) \
23
28
  Data_Get_Struct(obj, struct strscanner, var); \
24
29
  if (NIL_P(var->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");
30
+ #endif
25
31
 
26
32
  /*
27
33
  * Document-class: StringScanner::UTF8
@@ -34,22 +40,36 @@ struct strscanner {
34
40
  */
35
41
  static VALUE rb_cStringScanner_UTF8_getch(VALUE self) {
36
42
  unsigned char *str;
37
- size_t len;
38
- struct strscanner *scanner;
39
- VALUE utf8Str;
43
+ long len = 0, pos = 0;
44
+ VALUE utf8Str, curStr;
40
45
  int8_t lastCharLen=0;
46
+
47
+ #ifndef RUBINIUS
48
+ struct strscanner *scanner;
41
49
  GET_SCANNER(self, scanner);
42
50
 
43
- str = (unsigned char *)RSTRING_PTR(scanner->str);
44
- len = RSTRING_LEN(scanner->str);
51
+ curStr = scanner->str;
52
+ pos = scanner->curr;
53
+ #else
54
+ curStr = rb_iv_get(self, "@string");
55
+ pos = FIX2LONG(rb_iv_get(self, "@pos"));
56
+ #endif
57
+
58
+ str = (unsigned char *)RSTRING_PTR(curStr);
59
+ len = RSTRING_LEN(curStr);
45
60
 
46
- if (len > 0 && len > scanner->curr) {
61
+ if (len > 0 && len > pos) {
47
62
  lastCharLen = utf8CharLen(str, len);
48
63
  if (lastCharLen < 0) {
49
64
  rb_raise(rb_eArgError, "invalid utf-8 byte sequence");
50
65
  }
51
- utf8Str = rb_str_new((char *)str+scanner->curr, lastCharLen);
52
- scanner->curr += lastCharLen;
66
+ utf8Str = rb_str_new((char *)str+pos, lastCharLen);
67
+ pos += lastCharLen;
68
+ #ifndef RUBINIUS
69
+ scanner->curr = pos;
70
+ #else
71
+ rb_iv_set(self, "@pos", LONG2FIX(pos));
72
+ #endif
53
73
  AS_UTF8(utf8Str);
54
74
  return utf8Str;
55
75
  } else {
@@ -30,7 +30,7 @@ static VALUE rb_cString_UTF8_length(VALUE self) {
30
30
  *
31
31
  * Iterates over the string, yielding one UTF8 character at a time
32
32
  */
33
- static VALUE rb_cString_UTF8_each_char(VALUE self) {
33
+ static VALUE rb_cString_UTF8_each_char(int argc, VALUE *argv, VALUE self) {
34
34
  unsigned char *str = (unsigned char *)RSTRING_PTR(self);
35
35
  size_t len = RSTRING_LEN(self), i=0;
36
36
  int8_t lastCharLen=0;
@@ -38,7 +38,9 @@ static VALUE rb_cString_UTF8_each_char(VALUE self) {
38
38
 
39
39
  // this will return an Enumerator wrapping this string, yielding this method
40
40
  // when Enumerator#each is called
41
- RETURN_ENUMERATOR(self, 0, 0);
41
+ if (!rb_block_given_p()) {
42
+ return rb_funcall(self, rb_intern("to_enum"), 1, ID2SYM(rb_intern("each_char")));
43
+ }
42
44
 
43
45
  for(; i<len; i+=lastCharLen) {
44
46
  lastCharLen = utf8CharLen(str, len);
@@ -259,6 +261,6 @@ void init_String_UTF8() {
259
261
  VALUE rb_cString_UTF8 = rb_define_class_under(rb_cString, "UTF8", rb_cString);
260
262
 
261
263
  rb_define_method(rb_cString_UTF8, "length", rb_cString_UTF8_length, 0);
262
- rb_define_method(rb_cString_UTF8, "each_char", rb_cString_UTF8_each_char, 0);
264
+ rb_define_method(rb_cString_UTF8, "each_char", rb_cString_UTF8_each_char, -1);
263
265
  rb_define_method(rb_cString_UTF8, "[]", rb_cString_UTF8_slice, -1);
264
266
  }
data/lib/utf8/string.rb CHANGED
@@ -5,7 +5,7 @@ class String
5
5
  end
6
6
 
7
7
  class UTF8
8
- VERSION = "0.1.1"
8
+ VERSION = "0.1.2"
9
9
 
10
10
  # Gives you access to the raw non-UTF8-aware version of the string
11
11
  def as_raw
@@ -2,7 +2,7 @@
2
2
  require File.expand_path('../spec_helper', __FILE__)
3
3
 
4
4
  describe StringScanner::UTF8 do
5
- before(:all) do
5
+ before(:each) do
6
6
  @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
7
7
  @scanner = StringScanner.new(@char_array.join)
8
8
  @utf8_scanner = @scanner.as_utf8
@@ -10,7 +10,11 @@ describe StringScanner::UTF8 do
10
10
 
11
11
  it "should blow up on invalid utf8 chars" do
12
12
  # lets cut right into the middle of a sequence so we know it's bad
13
- scanner = StringScanner.new(@char_array.join[0..1]).as_utf8
13
+ str = @char_array.join
14
+ str.force_encoding('binary') if str.respond_to?(:force_encoding)
15
+ str = str[0..1]
16
+ str.force_encoding('utf-8') if str.respond_to?(:force_encoding)
17
+ scanner = StringScanner.new(str).as_utf8
14
18
 
15
19
  lambda {
16
20
  scanner.getch
data/spec/string_spec.rb CHANGED
@@ -2,7 +2,7 @@
2
2
  require File.expand_path('../spec_helper', __FILE__)
3
3
 
4
4
  describe String::UTF8 do
5
- before(:all) do
5
+ before(:each) do
6
6
  @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
7
7
  @str = @char_array.join
8
8
  @utf8 = @str.as_utf8
@@ -11,7 +11,10 @@ describe String::UTF8 do
11
11
 
12
12
  it "should blow up on invalid utf8 chars" do
13
13
  # lets cut right into the middle of a sequence so we know it's bad
14
- utf8 = @str[0..1].as_utf8
14
+ @str.force_encoding('binary') if @str.respond_to?(:force_encoding)
15
+ utf8 = @str[0..1]
16
+ utf8.force_encoding('utf-8') if utf8.respond_to?(:force_encoding)
17
+ utf8 = utf8.as_utf8
15
18
 
16
19
  lambda {
17
20
  utf8.length
data/utf8.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{utf8}
5
- s.version = "0.1.1"
5
+ s.version = "0.1.2"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Brian Lopez"]
@@ -24,14 +24,14 @@ Gem::Specification.new do |s|
24
24
 
25
25
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
26
26
  s.add_development_dependency(%q<rake-compiler>, [">= 0.7.5"])
27
- s.add_development_dependency(%q<rspec>, [">= 0"])
27
+ s.add_development_dependency(%q<rspec>, [">= 2.0.0"])
28
28
  else
29
29
  s.add_dependency(%q<rake-compiler>, [">= 0.7.5"])
30
- s.add_dependency(%q<rspec>, [">= 0"])
30
+ s.add_dependency(%q<rspec>, [">= 2.0.0"])
31
31
  end
32
32
  else
33
33
  s.add_dependency(%q<rake-compiler>, [">= 0.7.5"])
34
- s.add_dependency(%q<rspec>, [">= 0"])
34
+ s.add_dependency(%q<rspec>, [">= 2.0.0"])
35
35
  end
36
36
  end
37
37
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: utf8
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 31
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 1
10
- version: 0.1.1
9
+ - 2
10
+ version: 0.1.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brian Lopez
@@ -42,10 +42,12 @@ dependencies:
42
42
  requirements:
43
43
  - - ">="
44
44
  - !ruby/object:Gem::Version
45
- hash: 3
45
+ hash: 15
46
46
  segments:
47
+ - 2
48
+ - 0
47
49
  - 0
48
- version: "0"
50
+ version: 2.0.0
49
51
  type: :development
50
52
  version_requirements: *id002
51
53
  description: