RubyGems - utf8_proc - Versions diffs - 0.5.2 → 0.6.0 - Mend

utf8_proc 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/README.md +2 -0
data/ext/utf8_proc/utf8_proc.c +98 -12
data/lib/utf8_proc.rb +10 -0
data/lib/utf8_proc/benchmark.rb +4 -2
data/lib/utf8_proc/core_ext/string.rb +18 -5
data/lib/utf8_proc/core_ext/string_jruby.rb +46 -23
data/lib/utf8_proc/jruby.rb +12 -0
data/lib/utf8_proc/version.rb +2 -1
data/utf8_proc.gemspec +3 -2
metadata +23 -9

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ef828b3bc3e2413f1ef0df107c8924eaa84cac4a
-  data.tar.gz: c4fe505d4cdb442a921df2bc3bd2d1bd0a08d08b
+  metadata.gz: 33591974889df9c707aed4f9cb97b04f623736c1
+  data.tar.gz: c7d78939a0a7b9a7f4c5c5f74047aa05a75eee61
 SHA512:
-  metadata.gz: ea8ec35886cf54a9ce7047ba26e66a0904d4171a26a2697a656dc4d746c3509a6123e7e926beb27f26960bd5d218ade3377b0c38efcc2cafb1acf2ce5ea612d6
-  data.tar.gz: 886eb31311b4d4f5559632d0347cd81ab6378d62285a958f0d2a2fcc193ae7f82aeefe945a192eee4d77d56ebdffa68426a0e4993890e83eec2bb305da3dbb5c
+  metadata.gz: 62c02a23182fe04aae49a257b8a5d40a7b344d3ea7ffb3070cd763a8224be9e045eb427eab04d7ed5e85a20e02e906f3f610a6f46404abcdb6d4dd5e08c4d8e0
+  data.tar.gz: 9ec79b0312bb78e1e0e3575432c52024d4cce0456cfd009bf2a8cf8acca196e8a53d7cd1012ee54444d0887425c6f2641fa86efe91a1e85207c9e1e476cba6a4

data/README.md CHANGED

@@ -28,6 +28,8 @@ Or install it yourself as:
 ## Usage
+YARD documentation is available at [rubydoc.info](http://www.rubydoc.info/github/nomoon/utf8_proc)
 ```ruby
 require "utf8_proc"

data/ext/utf8_proc/utf8_proc.c CHANGED

@@ -65,58 +65,133 @@ static inline VALUE normInternal(VALUE *string, utf8proc_option_t options) {
   return new_str;
 }
-// NFC
+/**
+ * Normalizes a String using NFC (Canonical Decomposition, followed by Canonical
+ * Composition)
+ *
+ * @param string [String] the String to normalize
+ *
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized string
+ */
 static VALUE toNFC(VALUE self, VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
 }
+/**
+ * Normalizes self using NFC (Canonical Decomposition, followed by Canonical
+ * Composition)
+ *
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized copy of the string
+ */
 static VALUE StoNFC(VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
 }
-// NFD
+/**
+ * Normalizes a string using NFD (Canonical Decomposition)
+ *
+ * @param string [String] the String to normalize
+ *
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized string
+ */
 static VALUE toNFD(VALUE self, VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
 }
+/**
+ * Normalizes self using NFD (Canonical Decomposition)
+ *
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized copy of the string
+ */
 static VALUE StoNFD(VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE);
 }
-// NFKC
+/**
+ * Normalizes a string using NFKC (Compatibility Decomposition, followed by
+ * Canonical Composition)
+ *
+ * @param string [String] the String to normalize
+ *
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized string
+ */
 static VALUE toNFKC(VALUE self, VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
 }
+/**
+ * Normalizes self using NFKC (Compatibility Decomposition, followed by
+ * Canonical Composition)
+ *
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized copy of the string
+ */
 static VALUE StoNFKC(VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
 }
-// NFKD
+/**
+ * Normalizes a string using NFKD (Compatibility Decomposition)
+ *
+ * @param string [String] the String to normalize
+ *
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized string
+ */
 static VALUE toNFKD(VALUE self, VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
 }
+/**
+ * Normalizes self using NFKD (Compatibility Decomposition)
+ *
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized copy of the string
+ */
 static VALUE StoNFKD(VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT);
 }
-// NFKC_CF
+/**
+ * Normalizes a string using NFKC (Compatibility Decomposition, followed by
+ * Canonical Composition) with case-folding
+ *
+ * @param string [String] the String to normalize
+ *
+ * @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized string
+ */
 static VALUE toNFKC_CF(VALUE self, VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
 }
+/**
+ * Normalizes self using NFKC (Compatibility Decomposition, followed by
+ * Canonical Composition) with case-folding
+ *
+ * @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
+ * @return [String] a normalized copy of the string
+ */
 static VALUE StoNFKC_CF(VALUE string) {
   return normInternal(&string, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD);
 }
-// Parameterized normalization
+/**
+ * @overload normalize(string, form = :nfc)
+ *   Normalizes a string according to one of the 5 possible forms
+ *
+ *   @param string [String] the String to normalize
+ *   @param form [:nfc, :nfd, :nfkc, :nfkd, :nfkc_cf] the normalization form
+ *
+ *   @raise [EncodingError] if *string* is not encoded in *UTF-8* or *US-ASCII*
+ *   @raise [ArgumentError] if *form* is not one of the 5 valid forms
+ *   @return [String] a normalized string
+ */
 static VALUE toNorm(int argc, VALUE* argv, VALUE self){
   VALUE string;
   VALUE form;
@@ -145,6 +220,16 @@ static VALUE toNorm(int argc, VALUE* argv, VALUE self){
   }
 }
+/**
+ * @overload normalize(string, form = :nfc)
+ *   Normalizes self according to one of the 5 possible forms
+ *
+ *   @param form [:nfc, :nfd, :nfkc, :nfkd, :nfkc_cf] the normalization form
+ *
+ *   @raise [EncodingError] if *self* is not encoded in *UTF-8* or *US-ASCII*
+ *   @raise [ArgumentError] if *form* is not one of the 5 valid forms
+ *   @return [String] a normalized copy of the string
+ */
 static VALUE StoNorm(int argc, VALUE* argv, VALUE string){
   VALUE form;
   rb_scan_args(argc, argv, "01", &form);
@@ -186,6 +271,7 @@ void Init_utf8_proc(void) {
   const char *libVersion;
   libVersion = utf8proc_version();
+  // Displays the library version of the utf8proc library
   rb_define_const(rb_mBase, "LIBRARY_VERSION", rb_str_freeze(
     rb_enc_str_new(libVersion, strlen(libVersion), enc_utf8)
   ));

data/lib/utf8_proc.rb CHANGED

@@ -2,6 +2,7 @@
 require "utf8_proc/version"
 require "utf8_proc/benchmark"
+# Unicode string normalization library using UTF8Proc
 module UTF8Proc
   if RUBY_ENGINE == "jruby"
     require "utf8_proc/jruby"
@@ -9,4 +10,13 @@ module UTF8Proc
   else
     require "utf8_proc/utf8_proc"
   end
+  # Add lowercase name aliases for normalization methods
+  class << self
+    alias nfc NFC
+    alias nfd NFD
+    alias nfkc NFKC
+    alias nfkd NFKD
+    alias nfkc_cf NFKC_CF
+  end
 end

data/lib/utf8_proc/benchmark.rb CHANGED

@@ -1,10 +1,12 @@
 # frozen_string_literal: true
-# rubocop:disable MethodLength
 module UTF8Proc
+  # Benchmark module for comparing the speed of *UTF8Proc* and *UNF*
   module Benchmark
     module_function
-    def run
+    # Runs the benchmark and displays the results.
+    def run # rubocop:disable MethodLength
       require "benchmark/ips"
       require "unf"
       # Various different normalizations of Unicode characters.

data/lib/utf8_proc/core_ext/string.rb CHANGED

@@ -2,10 +2,23 @@
 require "utf8_proc"
-class String
-  if RUBY_ENGINE == "jruby"
-    require "utf8_proc/core_ext/string_jruby"
-  else
-    include ::UTF8Proc::StringExtension
+module UTF8Proc
+  # Module containing C core extension methods for the {::String} class.
+  #
+  # You can activate this by using:
+  #   require "utf8_proc/core_ext/string"
+  #
+  # It will load either C or Java extensions, depending on your Ruby version.
+  module StringExtension
+    if RUBY_ENGINE == "jruby"
+      require "utf8_proc/core_ext/string_jruby"
+    else
+      alias nfc NFC
+      alias nfd NFD
+      alias nfkc NFKC
+      alias nfkd NFKD
+      alias nfkc_cf NFKC_CF
+      String.send(:include, ::UTF8Proc::StringExtension)
+    end
   end
 end

data/lib/utf8_proc/core_ext/string_jruby.rb CHANGED

@@ -6,28 +6,51 @@
 require "java"
 require "utf8_proc"
-class String
-  def NFC
-    ::UTF8Proc.NFC(self)
-  end
-  def NFD
-    ::UTF8Proc.NFD(self)
-  end
-  def NFKC
-    ::UTF8Proc.NFKC(self)
-  end
-  def NFKD
-    ::UTF8Proc.NFKD(self)
-  end
-  def NFKC_CF
-    ::UTF8Proc.NFKC_CF(self)
-  end
-  def normalize(form = :nfc)
-    ::UTF8Proc.normalize(self, form)
+module UTF8Proc
+  module JRuby
+    # Module containing JRuby core extension methods for the {::String} class.
+    #
+    # You can activate this by using:
+    #   require "utf8_proc/core_ext/string"
+    #
+    # It will load either C or Java extensions, depending on your Ruby version.
+    module StringExtension
+      # @see UTF8Proc::StringExtension#NFC
+      def NFC
+        ::UTF8Proc.NFC(self)
+      end
+      alias nfc NFC
+      # @see UTF8Proc::StringExtension#NFD
+      def NFD
+        ::UTF8Proc.NFD(self)
+      end
+      alias nfd NFD
+      # @see UTF8Proc::StringExtension#NFKC
+      def NFKC
+        ::UTF8Proc.NFKC(self)
+      end
+      alias nfkc NFKC
+      # @see UTF8Proc::StringExtension#NFKD
+      def NFKD
+        ::UTF8Proc.NFKD(self)
+      end
+      alias nfkd NFKD
+      # @see UTF8Proc::StringExtension#NFKC_CF
+      def NFKC_CF
+        ::UTF8Proc.NFKC_CF(self)
+      end
+      alias nfkc_cf NFKC_CF
+      # @see UTF8Proc::StringExtension#normalize
+      def normalize(form = :nfc)
+        ::UTF8Proc.normalize(self, form)
+      end
+    end
   end
 end
+String.send(:include, ::UTF8Proc::JRuby::StringExtension)

data/lib/utf8_proc/jruby.rb CHANGED

@@ -6,37 +6,49 @@
 require "java"
 module UTF8Proc
+  # JRuby normalization module.
+  #
+  # This module will load automatically depending on your Ruby version.
   module JRuby
+    # Displays your version of the Java VM
     LIBRARY_VERSION = "Java #{ENV_JAVA['java.version']}".freeze
     JTNORM = java.text.Normalizer
     private_constant :JTNORM
+    # @!visibility private
     def self.included(receiver)
       receiver.extend(ClassMethods)
     end
+    # Methods added to the {::UTF8Proc} module in JRuby (instead of the C ones)
     module ClassMethods
+      # @see UTF8Proc.NFC
       def NFC(string)
         JTNORM.normalize(string, JTNORM::Form::NFC)
       end
+      # @see UTF8Proc.NFD
       def NFD(string)
         JTNORM.normalize(string, JTNORM::Form::NFD)
       end
+      # @see UTF8Proc.NFKC
       def NFKC(string)
         JTNORM.normalize(string, JTNORM::Form::NFKC)
       end
+      # @see UTF8Proc.NFKD
       def NFKD(string)
         JTNORM.normalize(string, JTNORM::Form::NFKD)
       end
+      # @see UTF8Proc.NFKC_CF
       def NFKC_CF(string)
         NFKC(string).to_java(:string).toLowerCase
       end
+      # @see UTF8Proc.normalize
       def normalize(string, form = :nfc)
         case form
         when :nfc

data/lib/utf8_proc/version.rb CHANGED

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
 module UTF8Proc
-  VERSION = "0.5.2".freeze
+  # The gem version
+  VERSION = "0.6.0".freeze
 end

data/utf8_proc.gemspec CHANGED

@@ -32,8 +32,9 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "pry", "~> 0.10"
   spec.add_development_dependency "minitest", "~> 5.10"
   spec.add_development_dependency "rubocop", "~> 0.47"
-  spec.add_development_dependency "benchmark-ips"
-  spec.add_development_dependency "unf"
+  spec.add_development_dependency "yard", "~> 0.9"
+  spec.add_development_dependency "benchmark-ips", "~> 2.7"
+  spec.add_development_dependency "unf", "~> 0.1"
   unless RUBY_ENGINE == "jruby"
     spec.extensions = ["ext/utf8_proc/extconf.rb"]

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: utf8_proc
 version: !ruby/object:Gem::Version
-  version: 0.5.2
+  version: 0.6.0
 platform: ruby
 authors:
 - Tim Bellefleur
@@ -80,34 +80,48 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.47'
+- !ruby/object:Gem::Dependency
+  name: yard
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
 - !ruby/object:Gem::Dependency
   name: benchmark-ips
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '2.7'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '2.7'
 - !ruby/object:Gem::Dependency
   name: unf
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '0.1'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '0.1'
 - !ruby/object:Gem::Dependency
   name: rake-compiler
   requirement: !ruby/object:Gem::Requirement