RubyGems - utf8_proc - Versions diffs - 0.1.0 → 0.2.0 - Mend

utf8_proc 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/README.md +24 -6
data/ext/utf8_proc/utf8_proc.c +32 -22
data/lib/utf8_proc/version.rb +1 -1
data/utf8_proc.gemspec +1 -1
metadata +2 -3
data/ext/utf8_proc/.clang_complete +0 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 8da1ac5c36921b8730f3a40bdc4311194f18ae54
-  data.tar.gz: 49011944ab5fbfc868876d33a7da5e043678f286
+  metadata.gz: a10b2518012c6465c1365bf5d1f5b74ffe31c1f8
+  data.tar.gz: 8d62ad653f961acbe83a0886b1aec36e0c545343
 SHA512:
-  metadata.gz: 7ddec0ae393c46c5b5e3464d619a6e2438b1fee300d4f7b15c43cacff7ebfce98ca5641236a5d9a67f2a4631af650a0bf7ab1abf9d8e02b0f9dd529cb1661353
-  data.tar.gz: 2ad29f62132cdcfef14f527b105d30dd6de67f3816f5b34faff6e1cffa15b728f31fdfd490fc9bf3c66c3b1c9fb870b57cfefedbe19afd8c0ccc8187286487e7
+  metadata.gz: 6802fae4cf0b8a3d515541f540f2cc377b6d3c10ac10ea48fa0b7b045169083143f44d77b6bb7b5aebd2d507365e438ca07316a447f8f2563c328c21d6bc2269
+  data.tar.gz: 8b7b35123552afd904a2ec05fc92301b9f03876f9721b53e7e95e3bd644dd37241fd98b94808750280b142eb02b7e897b3a00c9357354847ee70657c617d3522

data/README.md CHANGED Viewed

@@ -1,8 +1,12 @@
 # UTF8Proc
-Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/utf8_proc`. To experiment with that code, run `bin/console` for an interactive prompt.
+A simple wrapper around [utf8proc](https://github.com/JuliaLang/utf8proc) for normalizing Unicode strings. Requires the `utf8proc` library and headers to be installed on your system. *(Packages are available. OSX: `brew install utf8proc`, Linux: `libutf8proc-dev` or `utf8proc-devel`)*
-TODO: Delete this and the text above, and describe your gem
+Currently supports UTF-8/ASCII string input and NFC, NFD, NFKC, NFKD, and NKFC-Casefold forms. Handles Unicode 9.0 and includes the current official full suite of 9.0 normalization tests.
+Quick benchmarks against the [UNF](https://github.com/knu/ruby-unf) gem show it to be between the same speed (best-case) and ~2x slower (worst-case), averaging about ~1.2x slower on complex Unicode strings. The speed difference is more equal in NFC/NFD modes where mostly or already-normalized strings are used.
+*(Note: UNF is generally a bit faster but currently officially supports Unicode 6.0 and does not pass all 9.0 normalization tests.)*
 ## Installation
@@ -22,13 +26,27 @@ Or install it yourself as:
 ## Usage
-TODO: Write usage instructions here
+```ruby
+require "utf8_proc"
+# Canonical Decomposition, followed by Canonical Composition
+UTF8Proc.NFC(utf8_string)
-## Development
+# Canonical Decomposition
+UTF8Proc.NFD(utf8_string)
-After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
+# Compatibility Decomposition, followed by Canonical Composition
+UTF8Proc.NFKC(utf8_string)
-To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
+# Compatibility Decomposition
+UTF8Proc.NFKD(utf8_string)
+# Compatibility Decomposition, followed by Canonical Composition with Case-folding
+UTF8Proc.NFKC_CF(utf8_string)
+# Second argument may be any of: [:nfc (default), :nfd, :nfkc, :nfkd, :nfkc_cf]
+UTF8Proc.normalize(utf8_string, form = :nfc)
+```
 ## Contributing

data/ext/utf8_proc/utf8_proc.c CHANGED Viewed

@@ -7,6 +7,7 @@ ID NFC;
 ID NFD;
 ID NFKC;
 ID NFKD;
+ID NFKC_CF;
 static inline void checkStrEncoding(VALUE *string) {
   rb_encoding *enc = rb_enc_get(*string);
@@ -15,7 +16,7 @@ static inline void checkStrEncoding(VALUE *string) {
   }
 }
-static inline VALUE CnormInternal(VALUE string, utf8proc_option_t options) {
+static inline VALUE normInternal(VALUE string, utf8proc_option_t options) {
   checkStrEncoding(&string);
   utf8proc_uint8_t *retval;
   utf8proc_ssize_t retlen = utf8proc_map(
@@ -28,43 +29,50 @@ static inline VALUE CnormInternal(VALUE string, utf8proc_option_t options) {
 }
-VALUE CtoNFC(VALUE self, VALUE string) {
-  return CnormInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
+VALUE toNFC(VALUE self, VALUE string) {
+  return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
 }
-VALUE CtoNFD(VALUE self, VALUE string) {
-  return CnormInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
+VALUE toNFD(VALUE self, VALUE string) {
+  return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
 }
-VALUE CtoNFKC(VALUE self, VALUE string) {
-  return CnormInternal(string,UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
+VALUE toNFKC(VALUE self, VALUE string) {
+  return normInternal(string,UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
 }
-VALUE CtoNFKD(VALUE self, VALUE string) {
-  return CnormInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
+VALUE toNFKD(VALUE self, VALUE string) {
+  return normInternal(string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
 }
-VALUE Cnorm(int argc, VALUE* argv, VALUE self){
+VALUE toNFKC_CF(VALUE self, VALUE string) {
+  return normInternal(string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
+}
+VALUE norm(int argc, VALUE* argv, VALUE self){
   VALUE string;
   VALUE form;
   rb_scan_args(argc, argv, "11", &string, &form);
   if (NIL_P(form)) {
-    return CtoNFC(self, string);
+    return toNFC(self, string);
   }
   ID s_form = SYM2ID(form);
   if (s_form == NFC) {
-    return CtoNFC(self, string);
+    return toNFC(self, string);
   }else if(s_form == NFD) {
-    return CtoNFD(self, string);
+    return toNFD(self, string);
   }else if(s_form == NFKC) {
-    return CtoNFKC(self, string);
+    return toNFKC(self, string);
   }else if(s_form == NFKD) {
-    return CtoNFKD(self, string);
+    return toNFKD(self, string);
+  }else if(s_form == NFKC_CF) {
+    return toNFKC_CF(self, string);
   }else{
     rb_raise(rb_eRuntimeError, "%s",
-             "Second optional argument must be one of [:nfc, :nfd, :nfkc, :nfkd] (defaults to :nfc)");
+             "Second optional argument must be one of [:nfc, :nfd, :nfkc, :nfkd, :nfkc_cf] (defaults to :nfc)");
   }
 }
@@ -78,10 +86,12 @@ void Init_utf8_proc(void) {
   NFD = rb_intern("nfd");
   NFKC = rb_intern("nfkc");
   NFKD = rb_intern("nfkd");
-  rb_define_singleton_method(rb_mBase, "to_NFC", CtoNFC, 1);
-  rb_define_singleton_method(rb_mBase, "to_NFD", CtoNFD, 1);
-  rb_define_singleton_method(rb_mBase, "to_NFKC", CtoNFKC, 1);
-  rb_define_singleton_method(rb_mBase, "to_NFKD", CtoNFKD, 1);
-  rb_define_singleton_method(rb_mBase, "normalize", Cnorm, -1);
+  NFKC_CF = rb_intern("nfkc_cf");
+  rb_define_singleton_method(rb_mBase, "NFC", toNFC, 1);
+  rb_define_singleton_method(rb_mBase, "NFD", toNFD, 1);
+  rb_define_singleton_method(rb_mBase, "NFKC", toNFKC, 1);
+  rb_define_singleton_method(rb_mBase, "NFKD", toNFKD, 1);
+  rb_define_singleton_method(rb_mBase, "NFKC_CF", toNFKC_CF, 1);
+  rb_define_singleton_method(rb_mBase, "normalize", norm, -1);
 }

data/lib/utf8_proc/version.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 # frozen_string_literal: true
 module UTF8Proc
-  VERSION = "0.1.0"
+  VERSION = "0.2.0"
 end

data/utf8_proc.gemspec CHANGED Viewed

@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
   spec.authors       = ["Tim Bellefleur"]
   spec.email         = ["nomoon@phoebus.ca"]
-  spec.summary       = "Ruby Unicode library using utf8proc"
+  spec.summary       = "Unicode normalization library using utf8proc"
   spec.homepage      = "https://github.com/nomoon/utf8_proc"
   spec.license       = "MIT"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: utf8_proc
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.2.0
 platform: ruby
 authors:
 - Tim Bellefleur
@@ -113,7 +113,6 @@ files:
 - Rakefile
 - bin/console
 - bin/setup
-- ext/utf8_proc/.clang_complete
 - ext/utf8_proc/extconf.rb
 - ext/utf8_proc/utf8_proc.c
 - ext/utf8_proc/utf8_proc.h
@@ -143,5 +142,5 @@ rubyforge_project:
 rubygems_version: 2.6.10
 signing_key:
 specification_version: 4
-summary: Ruby Unicode library using utf8proc
+summary: Unicode normalization library using utf8proc
 test_files: []

data/ext/utf8_proc/.clang_complete DELETED Viewed

	@@ -1,2 +0,0 @@
1	- -I/Users/nomoon/.rvm/rubies/ruby-2.4.0/include/ruby-2.4.0/x86_64-darwin16
2	- -I/Users/nomoon/.rvm/rubies/ruby-2.4.0/include/ruby-2.4.0