RubyGems - utf8 - Versions diffs - 0.1.1 → 0.1.2 - Mend

utf8 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/.gitignore +2 -1
data/README.rdoc +2 -0
data/ext/utf8/string_scanner_utf8.c +30 -10
data/ext/utf8/string_utf8.c +5 -3
data/lib/utf8/string.rb +1 -1
data/spec/string_scanner_spec.rb +6 -2
data/spec/string_spec.rb +5 -2
data/utf8.gemspec +4 -4
metadata +7 -5

data/.gitignore CHANGED Viewed

@@ -1,4 +1,5 @@
 .DS_Store
 *.bundle
 *.o
-tmp/
+*.rbc
+tmp/

data/README.rdoc CHANGED Viewed

@@ -2,6 +2,8 @@
 The scanning code is implemented in C (would love a Java version as well, if any of you want to help with that).
+At the moment, this gem is tested on 1.8.7, 1.9.2 and Rubinius 1.2.1dev - it may work on others but ymmv.
 == String::UTF8 Example
 The String::UTF8#[] API isn't fully supported yet, and may never support regular expressions. I'll update this readme as I make progress.

data/ext/utf8/string_scanner_utf8.c CHANGED Viewed

@@ -1,9 +1,9 @@
 #include "ext.h"
-#include "ruby/regex.h"
 #include "utf8.h"
 extern ID intern_as_utf8;
+#ifndef RUBINIUS
 struct strscanner {
     /* multi-purpose flags */
     unsigned long flags;
@@ -15,13 +15,19 @@ struct strscanner {
     long prev; /* legal only when MATCHED_P(s) */
     long curr; /* always legal */
+    /*
+     * We never access this member, and would require a shitload of other patching
+     * to work right on other ruby versions
+     *
+     */
     /* the regexp register; legal only when MATCHED_P(s) */
-    struct re_registers regs;
+    /* struct re_registers regs; */
 };
 #define GET_SCANNER(obj, var)                                                          \
     Data_Get_Struct(obj, struct strscanner, var);                                      \
     if (NIL_P(var->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");
+#endif
 /*
  * Document-class: StringScanner::UTF8
@@ -34,22 +40,36 @@ struct strscanner {
  */
 static VALUE rb_cStringScanner_UTF8_getch(VALUE self) {
   unsigned char *str;
-  size_t len;
-  struct strscanner *scanner;
-  VALUE utf8Str;
+  long len = 0, pos = 0;
+  VALUE utf8Str, curStr;
   int8_t lastCharLen=0;
+#ifndef RUBINIUS
+  struct strscanner *scanner;
   GET_SCANNER(self, scanner);
-  str = (unsigned char *)RSTRING_PTR(scanner->str);
-  len = RSTRING_LEN(scanner->str);
+  curStr = scanner->str;
+  pos = scanner->curr;
+#else
+  curStr = rb_iv_get(self, "@string");
+  pos = FIX2LONG(rb_iv_get(self, "@pos"));
+#endif
+  str = (unsigned char *)RSTRING_PTR(curStr);
+  len = RSTRING_LEN(curStr);
-  if (len > 0 && len > scanner->curr) {
+  if (len > 0 && len > pos) {
     lastCharLen = utf8CharLen(str, len);
     if (lastCharLen < 0) {
       rb_raise(rb_eArgError, "invalid utf-8 byte sequence");
     }
-    utf8Str = rb_str_new((char *)str+scanner->curr, lastCharLen);
-    scanner->curr += lastCharLen;
+    utf8Str = rb_str_new((char *)str+pos, lastCharLen);
+    pos += lastCharLen;
+#ifndef RUBINIUS
+    scanner->curr = pos;
+#else
+    rb_iv_set(self, "@pos", LONG2FIX(pos));
+#endif
     AS_UTF8(utf8Str);
     return utf8Str;
   } else {

data/ext/utf8/string_utf8.c CHANGED Viewed

@@ -30,7 +30,7 @@ static VALUE rb_cString_UTF8_length(VALUE self) {
  *
  * Iterates over the string, yielding one UTF8 character at a time
  */
-static VALUE rb_cString_UTF8_each_char(VALUE self) {
+static VALUE rb_cString_UTF8_each_char(int argc, VALUE *argv, VALUE self) {
   unsigned char *str = (unsigned char *)RSTRING_PTR(self);
   size_t len = RSTRING_LEN(self), i=0;
   int8_t lastCharLen=0;
@@ -38,7 +38,9 @@ static VALUE rb_cString_UTF8_each_char(VALUE self) {
   // this will return an Enumerator wrapping this string, yielding this method
   // when Enumerator#each is called
-  RETURN_ENUMERATOR(self, 0, 0);
+  if (!rb_block_given_p()) {
+    return rb_funcall(self, rb_intern("to_enum"), 1, ID2SYM(rb_intern("each_char")));
+  }
   for(; i<len; i+=lastCharLen) {
     lastCharLen = utf8CharLen(str, len);
@@ -259,6 +261,6 @@ void init_String_UTF8() {
   VALUE rb_cString_UTF8 = rb_define_class_under(rb_cString, "UTF8", rb_cString);
   rb_define_method(rb_cString_UTF8, "length",    rb_cString_UTF8_length, 0);
-  rb_define_method(rb_cString_UTF8, "each_char", rb_cString_UTF8_each_char, 0);
+  rb_define_method(rb_cString_UTF8, "each_char", rb_cString_UTF8_each_char, -1);
   rb_define_method(rb_cString_UTF8, "[]",        rb_cString_UTF8_slice, -1);
 }

data/lib/utf8/string.rb CHANGED Viewed

@@ -5,7 +5,7 @@ class String
   end
   class UTF8
-    VERSION = "0.1.1"
+    VERSION = "0.1.2"
     # Gives you access to the raw non-UTF8-aware version of the string
     def as_raw

data/spec/string_scanner_spec.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 require File.expand_path('../spec_helper', __FILE__)
 describe StringScanner::UTF8 do
-  before(:all) do
+  before(:each) do
     @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
     @scanner = StringScanner.new(@char_array.join)
     @utf8_scanner = @scanner.as_utf8
@@ -10,7 +10,11 @@ describe StringScanner::UTF8 do
   it "should blow up on invalid utf8 chars" do
     # lets cut right into the middle of a sequence so we know it's bad
-    scanner = StringScanner.new(@char_array.join[0..1]).as_utf8
+    str = @char_array.join
+    str.force_encoding('binary') if str.respond_to?(:force_encoding)
+    str = str[0..1]
+    str.force_encoding('utf-8') if str.respond_to?(:force_encoding)
+    scanner = StringScanner.new(str).as_utf8
     lambda {
       scanner.getch

data/spec/string_spec.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 require File.expand_path('../spec_helper', __FILE__)
 describe String::UTF8 do
-  before(:all) do
+  before(:each) do
     @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
     @str = @char_array.join
     @utf8 = @str.as_utf8
@@ -11,7 +11,10 @@ describe String::UTF8 do
   it "should blow up on invalid utf8 chars" do
     # lets cut right into the middle of a sequence so we know it's bad
-    utf8 = @str[0..1].as_utf8
+    @str.force_encoding('binary') if @str.respond_to?(:force_encoding)
+    utf8 = @str[0..1]
+    utf8.force_encoding('utf-8') if utf8.respond_to?(:force_encoding)
+    utf8 = utf8.as_utf8
     lambda {
       utf8.length

data/utf8.gemspec CHANGED Viewed

@@ -2,7 +2,7 @@
 Gem::Specification.new do |s|
   s.name = %q{utf8}
-  s.version = "0.1.1"
+  s.version = "0.1.2"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Brian Lopez"]
@@ -24,14 +24,14 @@ Gem::Specification.new do |s|
     if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
       s.add_development_dependency(%q<rake-compiler>, [">= 0.7.5"])
-      s.add_development_dependency(%q<rspec>, [">= 0"])
+      s.add_development_dependency(%q<rspec>, [">= 2.0.0"])
     else
       s.add_dependency(%q<rake-compiler>, [">= 0.7.5"])
-      s.add_dependency(%q<rspec>, [">= 0"])
+      s.add_dependency(%q<rspec>, [">= 2.0.0"])
     end
   else
     s.add_dependency(%q<rake-compiler>, [">= 0.7.5"])
-    s.add_dependency(%q<rspec>, [">= 0"])
+    s.add_dependency(%q<rspec>, [">= 2.0.0"])
   end
 end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: utf8
 version: !ruby/object:Gem::Version
-  hash: 25
+  hash: 31
   prerelease:
   segments:
   - 0
   - 1
-  - 1
-  version: 0.1.1
+  - 2
+  version: 0.1.2
 platform: ruby
 authors:
 - Brian Lopez
@@ -42,10 +42,12 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 3
+        hash: 15
         segments:
+        - 2
+        - 0
         - 0
-        version: "0"
+        version: 2.0.0
   type: :development
   version_requirements: *id002
 description: