RubyGems - utf8_proc - Versions diffs - 0.5.2 → 0.6.0 - Mend

utf8_proc 0.5.2 → 0.6.0

Files changed (11) hide show

checksums.yaml +4 -4
data/README.md +2 -0
data/ext/utf8_proc/utf8_proc.c +98 -12
data/lib/utf8_proc.rb +10 -0
data/lib/utf8_proc/benchmark.rb +4 -2
data/lib/utf8_proc/core_ext/string.rb +18 -5
data/lib/utf8_proc/core_ext/string_jruby.rb +46 -23
data/lib/utf8_proc/jruby.rb +12 -0
data/lib/utf8_proc/version.rb +2 -1
data/utf8_proc.gemspec +3 -2
metadata +23 -9

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ef828b3bc3e2413f1ef0df107c8924eaa84cac4a
-  data.tar.gz: c4fe505d4cdb442a921df2bc3bd2d1bd0a08d08b
+  metadata.gz: 33591974889df9c707aed4f9cb97b04f623736c1
+  data.tar.gz: c7d78939a0a7b9a7f4c5c5f74047aa05a75eee61
 SHA512:
-  metadata.gz: ea8ec35886cf54a9ce7047ba26e66a0904d4171a26a2697a656dc4d746c3509a6123e7e926beb27f26960bd5d218ade3377b0c38efcc2cafb1acf2ce5ea612d6
-  data.tar.gz: 886eb31311b4d4f5559632d0347cd81ab6378d62285a958f0d2a2fcc193ae7f82aeefe945a192eee4d77d56ebdffa68426a0e4993890e83eec2bb305da3dbb5c
+  metadata.gz: 62c02a23182fe04aae49a257b8a5d40a7b344d3ea7ffb3070cd763a8224be9e045eb427eab04d7ed5e85a20e02e906f3f610a6f46404abcdb6d4dd5e08c4d8e0
+  data.tar.gz: 9ec79b0312bb78e1e0e3575432c52024d4cce0456cfd009bf2a8cf8acca196e8a53d7cd1012ee54444d0887425c6f2641fa86efe91a1e85207c9e1e476cba6a4

data/README.md CHANGED

@@ -28,6 +28,8 @@ Or install it yourself as:
 ## Usage
+YARD documentation is available at [rubydoc.info](http://www.rubydoc.info/github/nomoon/utf8_proc)
 ```ruby
 require "utf8_proc"

data/ext/utf8_proc/utf8_proc.c CHANGED

@@ -65,58 +65,133 @@ static inline VALUE normInternal(VALUE *string, utf8proc_option_t options) {
   return new_str;
 }
-// NFC
+/**
+ * Normalizes a String using NFC (Canonical Decomposition, followed by Canonical
+ * Composition)
+ *
+ * @param string [String] the String to normalize
+ *
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized string
+ */
 static VALUE toNFC(VALUE self, VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
 }
+/**
+ * Normalizes self using NFC (Canonical Decomposition, followed by Canonical
+ * Composition)
+ *
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized copy of the string
+ */
 static VALUE StoNFC(VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
 }
-// NFD
+/**
+ * Normalizes a string using NFD (Canonical Decomposition)
+ *
+ * @param string [String] the String to normalize
+ *
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized string
+ */
 static VALUE toNFD(VALUE self, VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
 }
+/**
+ * Normalizes self using NFD (Canonical Decomposition)
+ *
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized copy of the string
+ */
 static VALUE StoNFD(VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
 }
-// NFKC
+/**
+ * Normalizes a string using NFKC (Compatibility Decomposition, followed by
+ * Canonical Composition)
+ *
+ * @param string [String] the String to normalize
+ *
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized string
+ */
 static VALUE toNFKC(VALUE self, VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
 }
+/**
+ * Normalizes self using NFKC (Compatibility Decomposition, followed by
+ * Canonical Composition)
+ *
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized copy of the string
+ */
 static VALUE StoNFKC(VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
 }
-// NFKD
+/**
+ * Normalizes a string using NFKD (Compatibility Decomposition)
+ *
+ * @param string [String] the String to normalize
+ *
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized string
+ */
 static VALUE toNFKD(VALUE self, VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
 }
+/**
+ * Normalizes self using NFKD (Compatibility Decomposition)
+ *
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized copy of the string
+ */
 static VALUE StoNFKD(VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
 }
-// NFKC_CF
+/**
+ * Normalizes a string using NFKC (Compatibility Decomposition, followed by
+ * Canonical Composition) with case-folding
+ *
+ * @param string [String] the String to normalize
+ *
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized string
+ */
 static VALUE toNFKC_CF(VALUE self, VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
 }
+/**
+ * Normalizes self using NFKC (Compatibility Decomposition, followed by
+ * Canonical Composition) with case-folding
+ *
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized copy of the string
+ */
 static VALUE StoNFKC_CF(VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
 }
-// Parameterized normalization
+/**
+ * @overload normalize(string, form = :nfc)
+ *   Normalizes a string according to one of the 5 possible forms
+ *
+ *   @param string [String] the String to normalize
+ *   @param form [:nfc, :nfd, :nfkc, :nfkd, :nfkc_cf] the normalization form
+ *
+ *   @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
+ *   @raise [ArgumentError] if *form* is not one of the 5 valid forms
+ *   @return [String] a normalized string
+ */
 static VALUE toNorm(int argc, VALUE* argv, VALUE self){
   VALUE string;
   VALUE form;
@@ -145,6 +220,16 @@ static VALUE toNorm(int argc, VALUE* argv, VALUE self){
   }
 }
+/**
+ * @overload normalize(string, form = :nfc)
+ *   Normalizes self according to one of the 5 possible forms
+ *
+ *   @param form [:nfc, :nfd, :nfkc, :nfkd, :nfkc_cf] the normalization form
+ *
+ *   @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
+ *   @raise [ArgumentError] if *form* is not one of the 5 valid forms
+ *   @return [String] a normalized copy of the string
+ */
 static VALUE StoNorm(int argc, VALUE* argv, VALUE string){
   VALUE form;
   rb_scan_args(argc, argv, "01", &form);
@@ -186,6 +271,7 @@ void Init_utf8_proc(void) {
   const char *libVersion;
   libVersion = utf8proc_version();
+  // Displays the library version of the utf8proc library
   rb_define_const(rb_mBase, "LIBRARY_VERSION", rb_str_freeze(
     rb_enc_str_new(libVersion, strlen(libVersion), enc_utf8)
   ));

data/lib/utf8_proc.rb CHANGED

@@ -2,6 +2,7 @@
 require "utf8_proc/version"
 require "utf8_proc/benchmark"
+# Unicode string normalization library using UTF8Proc
 module UTF8Proc
   if RUBY_ENGINE == "jruby"
     require "utf8_proc/jruby"
@@ -9,4 +10,13 @@ module UTF8Proc
   else
     require "utf8_proc/utf8_proc"
   end
+  # Add lowercase name aliases for normalization methods
+  class << self
+    alias nfc NFC
+    alias nfd NFD
+    alias nfkc NFKC
+    alias nfkd NFKD
+    alias nfkc_cf NFKC_CF
+  end
 end

data/lib/utf8_proc/benchmark.rb CHANGED

@@ -1,10 +1,12 @@
 # frozen_string_literal: true
-# rubocop:disable MethodLength
 module UTF8Proc
+  # Benchmark module for comparing the speed of *UTF8Proc* and *UNF*
   module Benchmark
     module_function
-    def run
+    # Runs the benchmark and displays the results.
+    def run # rubocop:disable MethodLength
       require "benchmark/ips"
       require "unf"
       # Various different normalizations of Unicode characters.

data/lib/utf8_proc/core_ext/string.rb CHANGED

@@ -2,10 +2,23 @@
 require "utf8_proc"
-class String
-  if RUBY_ENGINE == "jruby"
-    require "utf8_proc/core_ext/string_jruby"
-  else
-    include ::UTF8Proc::StringExtension
+module UTF8Proc
+  # Module containing C core extension methods for the {::String} class.
+  #
+  # You can activate this by using:
+  #   require "utf8_proc/core_ext/string"
+  #
+  # It will load either C or Java extensions, depending on your Ruby version.
+  module StringExtension
+    if RUBY_ENGINE == "jruby"
+      require "utf8_proc/core_ext/string_jruby"
+    else
+      alias nfc NFC
+      alias nfd NFD
+      alias nfkc NFKC
+      alias nfkd NFKD
+      alias nfkc_cf NFKC_CF
+      String.send(:include, ::UTF8Proc::StringExtension)
+    end
   end
 end

data/lib/utf8_proc/core_ext/string_jruby.rb CHANGED

@@ -6,28 +6,51 @@
 require "java"
 require "utf8_proc"
-class String
-  def NFC
-    ::UTF8Proc.NFC(self)
-  end
-  def NFD
-    ::UTF8Proc.NFD(self)
-  end
-  def NFKC
-    ::UTF8Proc.NFKC(self)
-  end
-  def NFKD
-    ::UTF8Proc.NFKD(self)
-  end
-  def NFKC_CF
-    ::UTF8Proc.NFKC_CF(self)
-  end
-  def normalize(form = :nfc)
-    ::UTF8Proc.normalize(self, form)
+module UTF8Proc
+  module JRuby
+    # Module containing JRuby core extension methods for the {::String} class.
+    #
+    # You can activate this by using:
+    #   require "utf8_proc/core_ext/string"
+    #
+    # It will load either C or Java extensions, depending on your Ruby version.
+    module StringExtension
+      # @see UTF8Proc::StringExtension#NFC
+      def NFC
+        ::UTF8Proc.NFC(self)
+      end
+      alias nfc NFC
+      # @see UTF8Proc::StringExtension#NFD
+      def NFD
+        ::UTF8Proc.NFD(self)
+      end
+      alias nfd NFD
+      # @see UTF8Proc::StringExtension#NFKC
+      def NFKC
+        ::UTF8Proc.NFKC(self)
+      end
+      alias nfkc NFKC
+      # @see UTF8Proc::StringExtension#NFKD
+      def NFKD
+        ::UTF8Proc.NFKD(self)
+      end
+      alias nfkd NFKD
+      # @see UTF8Proc::StringExtension#NFKC_CF
+      def NFKC_CF
+        ::UTF8Proc.NFKC_CF(self)
+      end
+      alias nfkc_cf NFKC_CF
+      # @see UTF8Proc::StringExtension#normalize
+      def normalize(form = :nfc)
+        ::UTF8Proc.normalize(self, form)
+      end
+    end
   end
 end
+String.send(:include, ::UTF8Proc::JRuby::StringExtension)

data/lib/utf8_proc/jruby.rb CHANGED

@@ -6,37 +6,49 @@
 require "java"
 module UTF8Proc
+  # JRuby normalization module.
+  #
+  # This module will load automatically depending on your Ruby version.
   module JRuby
+    # Displays your version of the Java VM
     LIBRARY_VERSION = "Java #{ENV_JAVA['java.version']}".freeze
     JTNORM = java.text.Normalizer
     private_constant :JTNORM
+    # @!visibility private
     def self.included(receiver)
       receiver.extend(ClassMethods)
     end
+    # Methods added to the {::UTF8Proc} module in JRuby (instead of the C ones)
     module ClassMethods
+      # @see UTF8Proc.NFC
       def NFC(string)
         JTNORM.normalize(string, JTNORM::Form::NFC)
       end
+      # @see UTF8Proc.NFD
       def NFD(string)
         JTNORM.normalize(string, JTNORM::Form::NFD)
       end
+      # @see UTF8Proc.NFKC
       def NFKC(string)
         JTNORM.normalize(string, JTNORM::Form::NFKC)
       end
+      # @see UTF8Proc.NFKD
       def NFKD(string)
         JTNORM.normalize(string, JTNORM::Form::NFKD)
       end
+      # @see UTF8Proc.NFKC_CF
       def NFKC_CF(string)
         NFKC(string).to_java(:string).toLowerCase
       end
+      # @see UTF8Proc.normalize
       def normalize(string, form = :nfc)
         case form
         when :nfc

data/lib/utf8_proc/version.rb CHANGED

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
 module UTF8Proc
-  VERSION = "0.5.2".freeze
+  # The gem version
+  VERSION = "0.6.0".freeze
 end

data/utf8_proc.gemspec CHANGED

@@ -32,8 +32,9 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "pry", "~> 0.10"
   spec.add_development_dependency "minitest", "~> 5.10"
   spec.add_development_dependency "rubocop", "~> 0.47"
-  spec.add_development_dependency "benchmark-ips"
-  spec.add_development_dependency "unf"
+  spec.add_development_dependency "yard", "~> 0.9"
+  spec.add_development_dependency "benchmark-ips", "~> 2.7"
+  spec.add_development_dependency "unf", "~> 0.1"
   unless RUBY_ENGINE == "jruby"
     spec.extensions = ["ext/utf8_proc/extconf.rb"]

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: utf8_proc
 version: !ruby/object:Gem::Version
-  version: 0.5.2
+  version: 0.6.0
 platform: ruby
 authors:
 - Tim Bellefleur
@@ -80,34 +80,48 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.47'
+- !ruby/object:Gem::Dependency
+  name: yard
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
 - !ruby/object:Gem::Dependency
   name: benchmark-ips
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '2.7'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '2.7'
 - !ruby/object:Gem::Dependency
   name: unf
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '0.1'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '0.1'
 - !ruby/object:Gem::Dependency
   name: rake-compiler
   requirement: !ruby/object:Gem::Requirement