RubyGems - utf8 - Versions diffs - 0.1.7 → 0.1.8 - Mend

utf8 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/README.md ADDED

@@ -0,0 +1,52 @@
+# A lightweight UTF8-aware String class meant for use with Ruby 1.8.x
+The scanning code is implemented in C (would love a Java version as well, if any of you want to help with that).
+At the moment, this gem is tested on 1.8.7, 1.9.2, 1.9.3 and Rubinius - it may work on others but ymmv.
+## String::UTF8 Example
+The String::UTF8#[] API isn't fully supported yet, and may never support regular expressions. I'll update this readme as I make progress.
+``` ruby
+# String (on 1.8)
+str = "你好世界"
+str.length # 12
+str[0] # 228
+str.chars.to_a.inspect # ["\344", "\275", "\240", "\345", "\245", "\275", "\344", "\270", "\226", "\347", "\225", "\214"]
+# String::UTF8 (on 1.8)
+utf8_str = "你好世界".as_utf8
+utf8_str.length # 4
+utf8.chars.to_a.inspect # => ["你", "好", "世", "界"]
+utf8[0]        # => "你"
+utf8_str[0, 3] # => "你好世"
+utf8_str[0..3] # => "你好世界"
+```
+## StringScanner::UTF8 Example
+The full StringScanner API hasn't been made UTF8-aware yet. I may try and finish it later, but the getch method is all I need at the moment.
+``` ruby
+# StringScanner (on 1.8)
+scanner = StringScanner.new("你好世界")
+scanner.getch # => "\344"
+# StringScanner::UTF8
+require 'utf8/string_scanner'
+# NOTE: we could have used scanner.as_utf8 instead
+utf8_scanner = StringScanner::UTF8.new("你好世界")
+utf8_scanner.getch # => "你"
+utf8_scanner.getch # => "好"
+utf8_scanner.getch # => "世"
+utf8_scanner.reset
+utf8_scanner.getch # => "你"
+```
+## Disclaimer
+I'm only planning on making the methods I need UTF8-aware, if you need others I welcome pull requests :)

data/ext/utf8/string_utf8.c CHANGED

@@ -343,15 +343,23 @@ static VALUE rb_cString_UTF8_clean(VALUE self) {
   int8_t curCharLen;
   VALUE rb_out;
+  outBuf = NULL;
+  outBufCur = NULL;
   inBuf = (unsigned char *)RSTRING_PTR(self);
   inBufCur = inBuf;
   len = RSTRING_LEN(self);
-  outBuf = malloc(len);
-  outBufCur = outBuf;
   for(i=0; i<len; i+=curCharLen) {
     curCharLen = utf8CharLen(inBufCur, len);
     if (curCharLen < 0) {
+      if (!outBuf) {
+        outBuf = xmalloc(len);
+        outBufCur = outBuf;
+        if (inBufCur-inBuf > 0) {
+          memcpy(outBufCur, inBuf, inBufCur-inBuf);
+        }
+      }
       if (inBufCur-inBuf > 0) {
         memcpy(outBufCur, inBuf, inBufCur-inBuf);
         outBufCur += inBufCur-inBuf;
@@ -364,14 +372,18 @@ static VALUE rb_cString_UTF8_clean(VALUE self) {
     inBufCur += curCharLen;
   }
-  if (inBufCur-inBuf > 0) {
-    memcpy(outBufCur, inBuf, inBufCur-inBuf);
-  }
+  if (outBuf) {
+    if (inBufCur-inBuf > 0) {
+      memcpy(outBufCur, inBuf, inBufCur-inBuf);
+    }
-  rb_out = rb_str_new((const char*)outBuf, len);
-  AS_UTF8(rb_out);
+    rb_out = rb_str_new((const char*)outBuf, len);
+    xfree(outBuf);
-  free(outBuf);
+    AS_UTF8(rb_out);
+  } else {
+    rb_out = self;
+  }
   return rb_out;
 }

data/lib/utf8/version.rb CHANGED

@@ -1,5 +1,5 @@
 class String
   class UTF8 < ::String
-    VERSION = "0.1.7"
+    VERSION = "0.1.8"
   end
 end

data/spec/string_spec.rb CHANGED

@@ -64,6 +64,11 @@ describe String::UTF8 do
     assert_equal "asdf24\342\206\222asdf24", "asdf24\342\206\222asdf24".as_utf8.clean
   end
+  test "clean should return the passed value if it was valid UTF-8" do
+    orig = "asdfasdf".as_utf8
+    assert_equal orig.object_id, orig.clean.object_id
+  end
   context "#length and #size" do
     test "should be utf8-aware" do
       assert_equal @utf8_len, @utf8.length

data/utf8.gemspec CHANGED

@@ -7,9 +7,6 @@ Gem::Specification.new do |s|
   s.date = Time.now.utc.strftime("%Y-%m-%d")
   s.email = %q{seniorlopez@gmail.com}
   s.extensions = ["ext/utf8/extconf.rb"]
-  s.extra_rdoc_files = [
-    "README.rdoc"
-  ]
   s.files = `git ls-files`.split("\n")
   s.homepage = %q{http://github.com/brianmario/utf8}
   s.rdoc_options = ["--charset=UTF-8"]

metadata CHANGED

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: utf8
 version: !ruby/object:Gem::Version
-  hash: 21
+  hash: 11
   prerelease:
   segments:
   - 0
   - 1
-  - 7
-  version: 0.1.7
+  - 8
+  version: 0.1.8
 platform: ruby
 authors:
 - Brian Lopez
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-11-22 00:00:00 Z
+date: 2011-12-17 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake-compiler
@@ -69,15 +69,15 @@ executables: []
 extensions:
 - ext/utf8/extconf.rb
-extra_rdoc_files:
-- README.rdoc
+extra_rdoc_files: []
 files:
 - .gitignore
 - .rspec
 - .travis.yml
 - Gemfile
 - MIT-LICENSE
-- README.rdoc
+- README.md
 - Rakefile
 - benchmark/active_support.rb
 - benchmark/test.txt

data/README.rdoc DELETED

@@ -1,48 +0,0 @@
-= A lightweight UTF8-aware String class meant for use with Ruby 1.8.x
-The scanning code is implemented in C (would love a Java version as well, if any of you want to help with that).
-At the moment, this gem is tested on 1.8.7, 1.9.2 and Rubinius 1.2.1dev - it may work on others but ymmv.
-== String::UTF8 Example
-The String::UTF8#[] API isn't fully supported yet, and may never support regular expressions. I'll update this readme as I make progress.
-  # String (on 1.8)
-  str = "你好世界"
-  str.length # 12
-  str[0] # 228
-  str.chars.to_a.inspect # ["\344", "\275", "\240", "\345", "\245", "\275", "\344", "\270", "\226", "\347", "\225", "\214"]
-  # String::UTF8 (on 1.8)
-  utf8_str = "你好世界".as_utf8
-  utf8_str.length # 4
-  utf8.chars.to_a.inspect # => ["你", "好", "世", "界"]
-  utf8[0]        # => "你"
-  utf8_str[0, 3] # => "你好世"
-  utf8_str[0..3] # => "你好世界"
-== StringScanner::UTF8 Example
-The full StringScanner API hasn't been made UTF8-aware yet. I may try and finish it later, but the getch method is all I need at the moment.
-  # StringScanner (on 1.8)
-  scanner = StringScanner.new("你好世界")
-  scanner.getch # => "\344"
-  # StringScanner::UTF8
-  require 'utf8/string_scanner'
-  # NOTE: we could have used scanner.as_utf8 instead
-  utf8_scanner = StringScanner::UTF8.new("你好世界")
-  utf8_scanner.getch # => "你"
-  utf8_scanner.getch # => "好"
-  utf8_scanner.getch # => "世"
-  utf8_scanner.reset
-  utf8_scanner.getch # => "你"
-== Disclaimer
-I'm only planning on making the methods I need UTF8-aware, if you need others I welcome pull requests :)