RubyGems - utf8 - Versions diffs - 0.1.1 → 0.1.2 - Mend

utf8 0.1.1 → 0.1.2

Files changed (9) hide show

data/.gitignore +2 -1
data/README.rdoc +2 -0
data/ext/utf8/string_scanner_utf8.c +30 -10
data/ext/utf8/string_utf8.c +5 -3
data/lib/utf8/string.rb +1 -1
data/spec/string_scanner_spec.rb +6 -2
data/spec/string_spec.rb +5 -2
data/utf8.gemspec +4 -4
metadata +7 -5

data/.gitignore CHANGED Viewed

@@ -1,4 +1,5 @@
 .DS_Store
 *.bundle
 *.o
-tmp/
+*.rbc
+tmp/

data/README.rdoc CHANGED Viewed

@@ -2,6 +2,8 @@
 The scanning code is implemented in C (would love a Java version as well, if any of you want to help with that).
+At the moment, this gem is tested on 1.8.7, 1.9.2 and Rubinius 1.2.1dev - it may work on others but ymmv.
 == String::UTF8 Example
 The String::UTF8#[] API isn't fully supported yet, and may never support regular expressions. I'll update this readme as I make progress.

data/ext/utf8/string_scanner_utf8.c CHANGED Viewed

@@ -1,9 +1,9 @@
 #include "ext.h"
-#include "ruby/regex.h"
 #include "utf8.h"
 extern ID intern_as_utf8;
+#ifndef RUBINIUS
 struct strscanner {
     /* multi-purpose flags */
     unsigned long flags;
@@ -15,13 +15,19 @@ struct strscanner {
     long prev; /* legal only when MATCHED_P(s) */
     long curr; /* always legal */
+    /*
+     * We never access this member, and would require a shitload of other patching
+     * to work right on other ruby versions
+     *
+     */
     /* the regexp register; legal only when MATCHED_P(s) */
-    struct re_registers regs;
+    /* struct re_registers regs; */
 };
 #define GET_SCANNER(obj, var)                                                          \
     Data_Get_Struct(obj, struct strscanner, var);                                      \
     if (NIL_P(var->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");
+#endif
 /*
  * Document-class: StringScanner::UTF8
@@ -34,22 +40,36 @@ struct strscanner {
  */
 static VALUE rb_cStringScanner_UTF8_getch(VALUE self) {
   unsigned char *str;
-  size_t len;
-  struct strscanner *scanner;
-  VALUE utf8Str;
+  long len = 0, pos = 0;
+  VALUE utf8Str, curStr;
   int8_t lastCharLen=0;
+#ifndef RUBINIUS
+  struct strscanner *scanner;
   GET_SCANNER(self, scanner);
-  str = (unsigned char *)RSTRING_PTR(scanner->str);
-  len = RSTRING_LEN(scanner->str);
+  curStr = scanner->str;
+  pos = scanner->curr;
+#else
+  curStr = rb_iv_get(self, "@string");
+  pos = FIX2LONG(rb_iv_get(self, "@pos"));
+#endif
+  str = (unsigned char *)RSTRING_PTR(curStr);
+  len = RSTRING_LEN(curStr);
-  if (len > 0 && len > scanner->curr) {
+  if (len > 0 && len > pos) {
     lastCharLen = utf8CharLen(str, len);
     if (lastCharLen < 0) {
       rb_raise(rb_eArgError, "invalid utf-8 byte sequence");
     }
-    utf8Str = rb_str_new((char *)str+scanner->curr, lastCharLen);
-    scanner->curr += lastCharLen;
+    utf8Str = rb_str_new((char *)str+pos, lastCharLen);
+    pos += lastCharLen;
+#ifndef RUBINIUS
+    scanner->curr = pos;
+#else
+    rb_iv_set(self, "@pos", LONG2FIX(pos));
+#endif
     AS_UTF8(utf8Str);
     return utf8Str;
   } else {

data/ext/utf8/string_utf8.c CHANGED Viewed

@@ -30,7 +30,7 @@ static VALUE rb_cString_UTF8_length(VALUE self) {
  *
  * Iterates over the string, yielding one UTF8 character at a time
  */
-static VALUE rb_cString_UTF8_each_char(VALUE self) {
+static VALUE rb_cString_UTF8_each_char(int argc, VALUE *argv, VALUE self) {
   unsigned char *str = (unsigned char *)RSTRING_PTR(self);
   size_t len = RSTRING_LEN(self), i=0;
   int8_t lastCharLen=0;
@@ -38,7 +38,9 @@ static VALUE rb_cString_UTF8_each_char(VALUE self) {
   // this will return an Enumerator wrapping this string, yielding this method
   // when Enumerator#each is called
-  RETURN_ENUMERATOR(self, 0, 0);
+  if (!rb_block_given_p()) {
+    return rb_funcall(self, rb_intern("to_enum"), 1, ID2SYM(rb_intern("each_char")));
+  }
   for(; i<len; i+=lastCharLen) {
     lastCharLen = utf8CharLen(str, len);
@@ -259,6 +261,6 @@ void init_String_UTF8() {
   VALUE rb_cString_UTF8 = rb_define_class_under(rb_cString, "UTF8", rb_cString);
   rb_define_method(rb_cString_UTF8, "length",    rb_cString_UTF8_length, 0);
-  rb_define_method(rb_cString_UTF8, "each_char", rb_cString_UTF8_each_char, 0);
+  rb_define_method(rb_cString_UTF8, "each_char", rb_cString_UTF8_each_char, -1);
   rb_define_method(rb_cString_UTF8, "[]",        rb_cString_UTF8_slice, -1);
 }

data/lib/utf8/string.rb CHANGED Viewed

@@ -5,7 +5,7 @@ class String
   end
   class UTF8
-    VERSION = "0.1.1"
+    VERSION = "0.1.2"
     # Gives you access to the raw non-UTF8-aware version of the string
     def as_raw

data/spec/string_scanner_spec.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 require File.expand_path('../spec_helper', __FILE__)
 describe StringScanner::UTF8 do
-  before(:all) do
+  before(:each) do
     @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
     @scanner = StringScanner.new(@char_array.join)
     @utf8_scanner = @scanner.as_utf8
@@ -10,7 +10,11 @@ describe StringScanner::UTF8 do
   it "should blow up on invalid utf8 chars" do
     # lets cut right into the middle of a sequence so we know it's bad
-    scanner = StringScanner.new(@char_array.join[0..1]).as_utf8
+    str = @char_array.join
+    str.force_encoding('binary') if str.respond_to?(:force_encoding)
+    str = str[0..1]
+    str.force_encoding('utf-8') if str.respond_to?(:force_encoding)
+    scanner = StringScanner.new(str).as_utf8
     lambda {
       scanner.getch

data/spec/string_spec.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 require File.expand_path('../spec_helper', __FILE__)
 describe String::UTF8 do
-  before(:all) do
+  before(:each) do
     @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
     @str = @char_array.join
     @utf8 = @str.as_utf8
@@ -11,7 +11,10 @@ describe String::UTF8 do
   it "should blow up on invalid utf8 chars" do
     # lets cut right into the middle of a sequence so we know it's bad
-    utf8 = @str[0..1].as_utf8
+    @str.force_encoding('binary') if @str.respond_to?(:force_encoding)
+    utf8 = @str[0..1]
+    utf8.force_encoding('utf-8') if utf8.respond_to?(:force_encoding)
+    utf8 = utf8.as_utf8
     lambda {
       utf8.length

data/utf8.gemspec CHANGED Viewed

@@ -2,7 +2,7 @@
 Gem::Specification.new do |s|
   s.name = %q{utf8}
-  s.version = "0.1.1"
+  s.version = "0.1.2"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Brian Lopez"]
@@ -24,14 +24,14 @@ Gem::Specification.new do |s|
     if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
       s.add_development_dependency(%q<rake-compiler>, [">= 0.7.5"])
-      s.add_development_dependency(%q<rspec>, [">= 0"])
+      s.add_development_dependency(%q<rspec>, [">= 2.0.0"])
     else
       s.add_dependency(%q<rake-compiler>, [">= 0.7.5"])
-      s.add_dependency(%q<rspec>, [">= 0"])
+      s.add_dependency(%q<rspec>, [">= 2.0.0"])
     end
   else
     s.add_dependency(%q<rake-compiler>, [">= 0.7.5"])
-    s.add_dependency(%q<rspec>, [">= 0"])
+    s.add_dependency(%q<rspec>, [">= 2.0.0"])
   end
 end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: utf8
 version: !ruby/object:Gem::Version
-  hash: 25
+  hash: 31
   prerelease:
   segments:
   - 0
   - 1
-  - 1
-  version: 0.1.1
+  - 2
+  version: 0.1.2
 platform: ruby
 authors:
 - Brian Lopez
@@ -42,10 +42,12 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 3
+        hash: 15
         segments:
+        - 2
+        - 0
         - 0
-        version: "0"
+        version: 2.0.0
   type: :development
   version_requirements: *id002
 description: