RubyGems - sanscript - Versions diffs - 0.4.3 → 0.5.0 - Mend

sanscript 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/README.md +3 -1
data/lib/sanscript.rb +18 -3
data/lib/sanscript/benchmark.rb +43 -15
data/lib/sanscript/detect.rb +2 -0
data/lib/sanscript/exceptions.rb +19 -0
data/lib/sanscript/transliterate.rb +41 -40
data/lib/sanscript/transliterate/schemes.rb +80 -81
data/lib/sanscript/version.rb +1 -1
data/sanscript.gemspec +1 -1
metadata +5 -5
data/lib/sanscript/refinements.rb +0 -95

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 978f081a49e46e4cade8a7cdef95ebdb89f7daac
-  data.tar.gz: e46caf2df596e745398a6a5ac86d2c11ed0c666d
+  metadata.gz: 348f8d72cc3d76ba760a4225a4f784324294474a
+  data.tar.gz: f3a1215ad14dc3778795dc0f6563345aaa3015fe
 SHA512:
-  metadata.gz: e0f01c0322755e57c5690799660e810cc7d85271c5b6ef185ce21b13f516c396f1ae8e090529ecbfa8fd8a9bfcce8519f15d4d3291188d6bc1f83279c0b59071
-  data.tar.gz: f87954bc573383f2ddf530274ef69dbe32e67ab49d3f49e6db90790abafef51e51c8596c22d044ae40b4907921c38fc26182342f0c96008492aec938e56afbc9
+  metadata.gz: c8eae2315a8d3a68ce1a873585ab752902287a027a6d45961fef77ad0174d3646d7c7c36107a5879b5b8b36c10afef93428b34098b2ae86dfb6026bb4d644e94
+  data.tar.gz: 1997c7bb6d11f4b139eb5cde903bc17bffb786bcad236351100e55248d57945fbd19e7cf83f409af357c4c36d1dfb43fbb5adc5a0c2997e66622ed1576bb1921

data/README.md CHANGED

@@ -25,7 +25,9 @@ Or install it yourself as:
 ## Usage
-You can access detection through `Sanscript.detect(text)` and transliteration through `Sanscript.transliterate(text, from, to)`. Code should be fairly straightforward and partially documented.
+You can access detection through `Sanscript.detect(text)` and transliteration through `Sanscript.transliterate(text, from, to)`.
+Documentation is provided in YARD format and available online at [rubydoc.info](http://www.rubydoc.info/github/ubcsanskrit/sanscript.rb).
 ## Development

data/lib/sanscript.rb CHANGED

@@ -1,5 +1,8 @@
 # frozen_string_literal: true
+require "ragabash"
 require "sanscript/version"
+require "sanscript/exceptions"
 require "sanscript/detect"
 require "sanscript/transliterate"
 require "sanscript/benchmark"
@@ -21,23 +24,35 @@ module Sanscript
   #
   # @overload transliterate(text, from, to, **opts)
   #   @param text [String] the String to transliterate
-  #   @param from [Symbol] the name of the scheme to transliterate from
+  #   @param from [Symbol, nil] the name of the scheme to transliterate from, or Nil to detect
   #   @param to [Symbol] the name of the scheme to transliterate to
+  #   @option opts [Symbol] :default_scheme a default scheme to fall-back to if detection fails
   #   @option opts [Boolean] :skip_sgml (false) escape SGML-style tags in text string
   #   @option opts [Boolean] :syncope (false) activate Hindi-style schwa syncope
   #   @return [String] the transliterated String
   #
+  #   @raise [DetectionError] if scheme detection and fallback fail
+  #   @raise [SchemeNotSupportedError] if a provided transliteration scheme is not supported
+  #
   # @overload transliterate(text, to, **opts)
   #   @param text [String] the String to transliterate
   #   @param to [Symbol] the name of the scheme to transliterate to
   #   @option opts [Symbol] :default_scheme a default scheme to fall-back to if detection fails
   #   @option opts [Boolean] :skip_sgml (false) escape SGML-style tags in text string
   #   @option opts [Boolean] :syncope (false) activate Hindi-style schwa syncope
-  #   @return [String, nil] the transliterated String, or nil if detection and fallback fail
+  #   @return [String] the transliterated String
+  #
+  #   @raise [DetectionError] if scheme detection and fallback fail
+  #   @raise [SchemeNotSupportedError] if a provided transliteration scheme is not supported
+  #
   def transliterate(text, from, to = nil, **opts)
     if to.nil?
       to = from
-      from = Detect.detect_scheme(text) || opts[:default_scheme] || return
+      from = nil
+    end
+    if from.nil?
+      from = Detect.detect_scheme(text) || opts[:default_scheme] ||
+             raise(DetectionError, "String detection and fallback failed.")
     end
     Transliterate.transliterate(text, from, to, opts)
   end

data/lib/sanscript/benchmark.rb CHANGED

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
+#:nocov:
-require "sanscript/refinements"
 begin
   require "benchmark/ips"
 rescue LoadError
@@ -12,18 +12,17 @@ rescue LoadError
 end
 module Sanscript
-  using Refinements
   # Benchmark/testing module.
   module Benchmark
     module_function
     # Runs benchmark-ips test on detection methods.
     def detect!
-      iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
       deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
+      malayalam_string = "നാനാശാസ്ത്രസുഭാഷിതാമൃതരസൈഃ ശ്രോത്രോത്സവം കുര്വതാം യേഷാം യാന്തി ദിനാനി പണ്ഡിതജനവ്യായാമഖിന്നാത്മനാമ് തേഷാം ജന്മ ച ജീവിതം ച സുകൃതം തൈര് ഏവ ഭൂര് ഭൂഷിതാ ശേഷൈഹ് കിം പശുവദ് വിവേകരഹിതൈര് ഭൂഭാരഭൂതൈര് നരഃ"
+      iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
       slp1_string = "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"
       hk_string = "nAnAzAstrasubhASitAmRtarasaiH zrotrotsavaM kurvatAM yeSAM yAnti dinAni paNDitajanavyAyAmakhinnAtmanAm teSAM janma ca jIvitaM ca sukRtaM tair eva bhUr bhUSitA zeSaih kiM pazuvad vivekarahitair bhUbhArabhUtair naraH"
-      malayalam_string = "അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ"
       ::Benchmark.ips do |x|
         x.config(time: 5, warmup: 1)
@@ -47,32 +46,61 @@ module Sanscript
       true
     end
-    # Runs benchmark-ips test on transliteration methods.
-    def transliterate!
+    # Runs benchmark-ips test on roman-source transliteration methods.
+    def transliterate_roman!
       iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
-      deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
       slp1_string = "nAnASAstrasuBAzitAmftarasEH SrotrotsavaM kurvatAM yezAM yAnti dinAni paRqitajanavyAyAmaKinnAtmanAm tezAM janma ca jIvitaM ca sukftaM tEr eva BUr BUzitA SezEh kiM paSuvad vivekarahitEr BUBAraBUtEr naraH"
+      hk_string = "nAnAzAstrasubhASitAmRtarasaiH zrotrotsavaM kurvatAM yeSAM yAnti dinAni paNDitajanavyAyAmakhinnAtmanAm teSAM janma ca jIvitaM ca sukRtaM tair eva bhUr bhUSitA zeSaih kiM pazuvad vivekarahitair bhUbhArabhUtair naraH"
       ::Benchmark.ips do |x|
-        x.config(time: 5, warmup: 2)
+        x.config(time: 3, warmup: 2)
         x.report("IAST==>Devanagari") do
-          raise unless Sanscript.transliterate(iast_string, :iast, :devanagari) == deva_string
+          Sanscript.transliterate(iast_string, :iast, :devanagari)
+        end
+        x.report("IAST==>SLP1") do
+          Sanscript.transliterate(iast_string, :iast, :slp1)
         end
         x.report("IAST==>SLP1") do
-          raise unless Sanscript.transliterate(iast_string, :iast, :slp1) == slp1_string
+          Sanscript.transliterate(iast_string, :iast, :hk)
         end
         x.report("SLP1==>Devanagari") do
-          raise unless Sanscript.transliterate(slp1_string, :slp1, :devanagari) == deva_string
+          Sanscript.transliterate(slp1_string, :slp1, :devanagari)
         end
         x.report("SLP1==>IAST") do
-          raise unless Sanscript.transliterate(slp1_string, :slp1, :iast) == iast_string
+          Sanscript.transliterate(slp1_string, :slp1, :iast)
         end
-        x.report("Devanagari==>SLP1") do
-          raise unless Sanscript.transliterate(deva_string, :devanagari, :slp1) == slp1_string
+        x.report("SLP1==>HK") do
+          Sanscript.transliterate(slp1_string, :slp1, :hk)
+        end
+        x.report("HK==>Devanagari") do
+          Sanscript.transliterate(hk_string, :hk, :devanagari)
         end
+        x.report("HK==>IAST") do
+          Sanscript.transliterate(hk_string, :hk, :iast)
+        end
+        x.report("HK==>SLP1") do
+          Sanscript.transliterate(hk_string, :hk, :slp1)
+        end
+        x.compare!
+      end
+      true
+    end
+    # Runs benchmark-ips test on brahmic-source transliteration methods.
+    def transliterate_brahmic!
+      deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
+      ::Benchmark.ips do |x|
+        x.config(time: 5, warmup: 2)
         x.report("Devanagari==>IAST") do
-          raise unless Sanscript.transliterate(deva_string, :devanagari, :iast) == iast_string
+          Sanscript.transliterate(deva_string, :devanagari, :iast)
+        end
+        x.report("Devanagari==>SLP1") do
+          Sanscript.transliterate(deva_string, :devanagari, :slp1)
+        end
+        x.report("Devanagari==>HK") do
+          Sanscript.transliterate(deva_string, :devanagari, :hk)
         end
         x.compare!
       end

data/lib/sanscript/detect.rb CHANGED

@@ -61,6 +61,7 @@ module Sanscript
     #   @return [Symbol, nil] the Symbol of the scheme, or nil if no match
     # @!visibility private
+    # :nocov:
     if Regexp.method_defined?(:match?)
       require "sanscript/detect/ruby24"
       extend Ruby24
@@ -68,5 +69,6 @@ module Sanscript
       require "sanscript/detect/ruby2x"
       extend Ruby2x
     end
+    # :nocov:
   end
 end

data/lib/sanscript/exceptions.rb ADDED

@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+module Sanscript
+  using ::Ragabash::Refinements
+  # Error for when transliteration scheme is not supported.
+  class SchemeNotSupportedError < StandardError
+    def initialize(scheme = :unknown)
+      super(":#{scheme} is not supported.")
+    end
+  end
+  # Error for when scheme detection should non-silently fail
+  # (such as inside a transliteration method).
+  class DetectionError < StandardError
+    def initialize(message = "String detection failed.")
+      super
+    end
+  end
+end

data/lib/sanscript/transliterate.rb CHANGED

@@ -1,9 +1,8 @@
 # frozen_string_literal: true
-require "sanscript/refinements"
 require "sanscript/transliterate/schemes"
 module Sanscript
-  using Refinements
+  using ::Ragabash::Refinements
   # Sanskrit transliteration module.
   # Derived from Sanscript (https://github.com/sanskrit/sanscript.js), which is
   # released under the MIT and GPL Licenses.
@@ -144,8 +143,8 @@ module Sanscript
       from = from.to_sym
       to = to.to_sym
       return data if from == to
-      raise "Scheme not known ':#{from}'" unless @schemes.key?(from)
-      raise "Scheme not known ':#{to}'" unless @schemes.key?(to)
+      raise SchemeNotSupportedError, from unless @schemes.key?(from)
+      raise SchemeNotSupportedError, to unless @schemes.key?(to)
       data = data.to_str.dup
       options = @defaults.merge(opts)
@@ -229,39 +228,40 @@ module Sanscript
       # @param map [Hash] map data generated from {#make_map}
       # @return [String] the transliterated string
       def transliterate_roman(data, map, options = {})
-        data = data.to_str.dup
+        data = data.to_str.chars
         buf = []
-        token_buffer = String.new
+        token_buffer = []
         had_consonant = false
         transliteration_enabled = true
         control_char = false
+        max_token_length = map[:max_token_length]
         until data.empty? && token_buffer.empty?
-          token_buffer << data.slice!(0, map[:max_token_length] - token_buffer.length)
           # Match all token substrings to our map.
-          map[:max_token_length].downto(1) do |j|
-            token = token_buffer[0, j]
-            if !control_char && token == "##"
-              transliteration_enabled = !transliteration_enabled
-              token_buffer.slice!(0, 2)
-              break
-            elsif control_char && token == "#}"
-              transliteration_enabled = true
-              control_char = false
-              buf << token
-              token_buffer.slice!(0, 2)
-              break
-            elsif transliteration_enabled && token == "{#"
-              transliteration_enabled = false
-              control_char = true
-              buf << token
-              token_buffer.slice!(0, 2)
-              break
+          token = data[0, max_token_length].join("")
+          max_token_length.downto(1) do |j|
+            token = token[0, j] unless j == max_token_length
+            if j == 2
+              if !control_char && token == "##"
+                transliteration_enabled = !transliteration_enabled
+                data.shift(2)
+                break
+              elsif control_char && token == "#}"
+                transliteration_enabled = true
+                control_char = false
+                buf << token
+                data.shift(2)
+                break
+              elsif transliteration_enabled && token == "{#"
+                transliteration_enabled = false
+                control_char = true
+                buf << token
+                data.shift(2)
+                break
+              end
             end
-            temp_letter = map[:letters][token]
-            if !temp_letter.nil? && transliteration_enabled
+            if transliteration_enabled && (temp_letter = map[:letters][token])
               if map[:to_roman?]
                 buf << temp_letter
               else
@@ -269,18 +269,19 @@ module Sanscript
                 # vowels to appear as marks if we've just seen a
                 # consonant.
                 if had_consonant
-                  temp_mark = map[:marks][token]
-                  if !temp_mark.nil?
+                  # rubocop:disable Metrics/BlockNesting
+                  if (temp_mark = map[:marks][token])
                     buf << temp_mark
                   elsif token != "a"
-                    buf << map[:virama] << temp_letter
+                    buf.push(map[:virama], temp_letter)
                   end
+                  # rubocop:enable Metrics/BlockNesting
                 else
                   buf << temp_letter
                 end
                 had_consonant = map[:consonants].key?(token)
               end
-              token_buffer.slice!(0, j)
+              j > 1 ? data.shift(j) : data.shift
               break
             elsif j == 1 # Last iteration
               if had_consonant
@@ -288,7 +289,7 @@ module Sanscript
                 buf << map[:virama] unless options[:syncope]
               end
               buf << token
-              token_buffer.slice!(0, 1)
+              data.shift
             end
           end
         end
@@ -302,27 +303,27 @@ module Sanscript
       # @param map [Hash] map data generated from {#make_map}
       # @return [String] the transliterated string
       def transliterate_brahmic(data, map)
-        data = data.to_str.dup
+        data = data.to_str.chars
         buf = []
         had_roman_consonant = false
         transliteration_enabled = true
         control_char = false
         until data.empty?
-          token = data.slice(0, 2)
+          token = data[0, 2].join("")
           if !control_char && token == "##"
             if had_roman_consonant
               buf << "a" if transliteration_enabled
               had_roman_consonant = false
             end
             transliteration_enabled = !transliteration_enabled
-            data.slice!(0, 2)
+            data.shift(2)
             next
           elsif control_char && token == "#}"
             transliteration_enabled = true
             control_char = false
             buf << token
-            data.slice!(0, 2)
+            data.shift(2)
             next
           elsif transliteration_enabled && token == "{#"
             if had_roman_consonant
@@ -332,11 +333,11 @@ module Sanscript
             transliteration_enabled = false
             control_char = true
             buf << token
-            data.slice!(0, 2)
+            data.shift(2)
             next
           end
-          l = data.slice!(0, 1)
+          l = data.shift
           unless transliteration_enabled
             buf << l
             next

data/lib/sanscript/transliterate/schemes.rb CHANGED

@@ -1,8 +1,7 @@
 # frozen_string_literal: true
-require "sanscript/refinements"
 module Sanscript
-  using Refinements
+  using ::Ragabash::Refinements
   module Transliterate
     #  Schemes
     #  =======
@@ -25,13 +24,13 @@ module Sanscript
       # 'va' and 'ba' are both rendered as ব.
       #
       bengali: {
-        vowels: "অ আ ই ঈ উ ঊ ঋ ৠ ঌ ৡ  এ ঐ  ও ঔ".w_split,
-        vowel_marks: "া ি ী ু ূ ৃ ৄ ৢ ৣ  ে ৈ  ো ৌ".w_split,
-        other_marks: "ং ঃ ঁ".w_split,
+        vowels: "অ আ ই ঈ উ ঊ ঋ ৠ ঌ ৡ  এ ঐ  ও ঔ".split(/\s/),
+        vowel_marks: "া ি ী ু ূ ৃ ৄ ৢ ৣ  ে ৈ  ো ৌ".split(/\s/),
+        other_marks: "ং ঃ ঁ".split(/\s/),
         virama: ["্"],
-        consonants: "ক খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড ঢ ণ ত থ দ ধ ন প ফ ব ভ ম য র ল ব শ ষ স হ ळ ক্ষ জ্ঞ".w_split,
-        symbols: "০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯ ॐ ঽ । ॥".w_split,
-        other: "    ড ঢ  য ".w_split,
+        consonants: "ক খ গ ঘ ঙ চ ছ জ ঝ ঞ ট ঠ ড ঢ ণ ত থ দ ধ ন প ফ ব ভ ম য র ল ব শ ষ স হ ळ ক্ষ জ্ঞ".split(/\s/),
+        symbols: "০ ১ ২ ৩ ৪ ৫ ৬ ৭ ৮ ৯ ॐ ঽ । ॥".split(/\s/),
+        other: "    ড ঢ  য ".split(/\s/),
       },
       # Devanagari
@@ -41,15 +40,15 @@ module Sanscript
       devanagari: {
         # "Independent" forms of the vowels. These are used whenever the
         # vowel does not immediately follow a consonant.
-        vowels: "अ आ इ ई उ ऊ ऋ ॠ ऌ ॡ ऎ ए ऐ ऒ ओ औ".w_split,
+        vowels: "अ आ इ ई उ ऊ ऋ ॠ ऌ ॡ ऎ ए ऐ ऒ ओ औ".split(/\s/),
         # "Dependent" forms of the vowels. These are used whenever the
         # vowel immediately follows a consonant. If a letter is not
         # listed in `vowels`, it should not be listed here.
-        vowel_marks: "ा ि ी ु ू ृ ॄ ॢ ॣ ॆ े ै ॊ ो ौ".w_split,
+        vowel_marks: "ा ि ी ु ू ृ ॄ ॢ ॣ ॆ े ै ॊ ो ौ".split(/\s/),
         # Miscellaneous marks, all of which are used in Sanskrit.
-        other_marks: "ं ः ँ".w_split,
+        other_marks: "ं ः ँ".split(/\s/),
         # In syllabic scripts like Devanagari, consonants have an inherent
         # vowel that must be suppressed explicitly. We do so by putting a
@@ -58,10 +57,10 @@ module Sanscript
         # Various Sanskrit consonants and consonant clusters. Every token
         # here has an explicit vowel. Thus "क" is "ka" instead of "k".
-        consonants: "क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह ळ क्ष ज्ञ".w_split,
+        consonants: "क ख ग घ ङ च छ ज झ ञ ट ठ ड ढ ण त थ द ध न प फ ब भ म य र ल व श ष स ह ळ क्ष ज्ञ".split(/\s/),
         # Numbers and punctuation
-        symbols: "० १ २ ३ ४ ५ ६ ७ ८ ९ ॐ ऽ । ॥".w_split,
+        symbols: "० १ २ ३ ४ ५ ६ ७ ८ ९ ॐ ऽ । ॥".split(/\s/),
         # Zero-width joiner. This is used to separate a consonant cluster
         # and avoid a complex ligature.
@@ -76,12 +75,12 @@ module Sanscript
         # Accent combined with anusvara and and visarga. For compatibility
         # with ITRANS, which allows the reverse of these four.
-        combo_accent: "ः॑ ः॒ ं॑ ं॒".w_split,
+        combo_accent: "ः॑ ः॒ ं॑ ं॒".split(/\s/),
         candra: ["ॅ"],
         # Non-Sanskrit consonants
-        other: "क़ ख़ ग़ ज़ ड़ ढ़ फ़ य़ ऱ".w_split,
+        other: "क़ ख़ ग़ ज़ ड़ ढ़ फ़ य़ ऱ".split(/\s/),
       },
       # Gujarati
@@ -89,12 +88,12 @@ module Sanscript
       # Sanskrit-complete.
       #
       gujarati: {
-        vowels: "અ આ ઇ ઈ ઉ ઊ ઋ ૠ ઌ ૡ  એ ઐ  ઓ ઔ".w_split,
-        vowel_marks: "ા િ ી ુ ૂ ૃ ૄ ૢ ૣ  ે ૈ  ો ૌ".w_split,
-        other_marks: "ં ઃ ઁ".w_split,
+        vowels: "અ આ ઇ ઈ ઉ ઊ ઋ ૠ ઌ ૡ  એ ઐ  ઓ ઔ".split(/\s/),
+        vowel_marks: "ા િ ી ુ ૂ ૃ ૄ ૢ ૣ  ે ૈ  ો ૌ".split(/\s/),
+        other_marks: "ં ઃ ઁ".split(/\s/),
         virama: ["્"],
-        consonants: "ક ખ ગ ઘ ઙ ચ છ જ ઝ ઞ ટ ઠ ડ ઢ ણ ત થ દ ધ ન પ ફ બ ભ મ ય ર લ વ શ ષ સ હ ળ ક્ષ જ્ઞ".w_split,
-        symbols: "૦ ૧ ૨ ૩ ૪ ૫ ૬ ૭ ૮ ૯ ૐ ઽ ૤ ૥".w_split,
+        consonants: "ક ખ ગ ઘ ઙ ચ છ જ ઝ ઞ ટ ઠ ડ ઢ ણ ત થ દ ધ ન પ ફ બ ભ મ ય ર લ વ શ ષ સ હ ળ ક્ષ જ્ઞ".split(/\s/),
+        symbols: "૦ ૧ ૨ ૩ ૪ ૫ ૬ ૭ ૮ ૯ ૐ ઽ ૤ ૥".split(/\s/),
         candra: ["ૅ"],
       },
@@ -103,13 +102,13 @@ module Sanscript
       # Missing R/RR/lR/lRR
       #
       gurmukhi: {
-        vowels: "ਅ ਆ ਇ ਈ ਉ ਊ      ਏ ਐ  ਓ ਔ".w_split,
-        vowel_marks: "ਾ ਿ ੀ ੁ ੂ      ੇ ੈ  ੋ ੌ".w_split,
-        other_marks: "ਂ ਃ ਁ".w_split,
+        vowels: "ਅ ਆ ਇ ਈ ਉ ਊ      ਏ ਐ  ਓ ਔ".split(/\s/),
+        vowel_marks: "ਾ ਿ ੀ ੁ ੂ      ੇ ੈ  ੋ ੌ".split(/\s/),
+        other_marks: "ਂ ਃ ਁ".split(/\s/),
         virama: ["੍"],
-        consonants: "ਕ ਖ ਗ ਘ ਙ ਚ ਛ ਜ ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ ਬ ਭ ਮ ਯ ਰ ਲ ਵ ਸ਼ ਸ਼ ਸ ਹ ਲ਼ ਕ੍ਸ਼ ਜ੍ਞ".w_split,
-        symbols: "੦ ੧ ੨ ੩ ੪ ੫ ੬ ੭ ੮ ੯ ॐ ऽ । ॥".w_split,
-        other: " ਖ ਗ ਜ ਡ  ਫ  ".w_split,
+        consonants: "ਕ ਖ ਗ ਘ ਙ ਚ ਛ ਜ ਝ ਞ ਟ ਠ ਡ ਢ ਣ ਤ ਥ ਦ ਧ ਨ ਪ ਫ ਬ ਭ ਮ ਯ ਰ ਲ ਵ ਸ਼ ਸ਼ ਸ ਹ ਲ਼ ਕ੍ਸ਼ ਜ੍ਞ".split(/\s/),
+        symbols: "੦ ੧ ੨ ੩ ੪ ੫ ੬ ੭ ੮ ੯ ॐ ऽ । ॥".split(/\s/),
+        other: " ਖ ਗ ਜ ਡ  ਫ  ".split(/\s/),
       },
       # Kannada
@@ -117,13 +116,13 @@ module Sanscript
       # Sanskrit-complete.
       #
       kannada: {
-        vowels: "ಅ ಆ ಇ ಈ ಉ ಊ ಋ ೠ ಌ ೡ ಎ ಏ ಐ ಒ ಓ ಔ".w_split,
-        vowel_marks: "ಾ ಿ ೀ ು ೂ ೃ ೄ ೢ ೣ ೆ ೇ ೈ ೊ ೋ ೌ".w_split,
-        other_marks: "ಂ ಃ ँ".w_split,
+        vowels: "ಅ ಆ ಇ ಈ ಉ ಊ ಋ ೠ ಌ ೡ ಎ ಏ ಐ ಒ ಓ ಔ".split(/\s/),
+        vowel_marks: "ಾ ಿ ೀ ು ೂ ೃ ೄ ೢ ೣ ೆ ೇ ೈ ೊ ೋ ೌ".split(/\s/),
+        other_marks: "ಂ ಃ ँ".split(/\s/),
         virama: ["್"],
-        consonants: "ಕ ಖ ಗ ಘ ಙ ಚ ಛ ಜ ಝ ಞ ಟ ಠ ಡ ಢ ಣ ತ ಥ ದ ಧ ನ ಪ ಫ ಬ ಭ ಮ ಯ ರ ಲ ವ ಶ ಷ ಸ ಹ ಳ ಕ್ಷ ಜ್ಞ".w_split,
-        symbols: "೦ ೧ ೨ ೩ ೪ ೫ ೬ ೭ ೮ ೯ ಓಂ ಽ । ॥".w_split,
-        other: "      ಫ  ಱ".w_split,
+        consonants: "ಕ ಖ ಗ ಘ ಙ ಚ ಛ ಜ ಝ ಞ ಟ ಠ ಡ ಢ ಣ ತ ಥ ದ ಧ ನ ಪ ಫ ಬ ಭ ಮ ಯ ರ ಲ ವ ಶ ಷ ಸ ಹ ಳ ಕ್ಷ ಜ್ಞ".split(/\s/),
+        symbols: "೦ ೧ ೨ ೩ ೪ ೫ ೬ ೭ ೮ ೯ ಓಂ ಽ । ॥".split(/\s/),
+        other: "      ಫ  ಱ".split(/\s/),
       },
       # Malayalam
@@ -131,13 +130,13 @@ module Sanscript
       # Sanskrit-complete.
       #
       malayalam: {
-        vowels: "അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ".w_split,
-        vowel_marks: "ാ ി ീ ു ൂ ൃ ൄ ൢ ൣ െ േ ൈ ൊ ോ ൌ".w_split,
-        other_marks: "ം ഃ ँ".w_split,
+        vowels: "അ ആ ഇ ഈ ഉ ഊ ഋ ൠ ഌ ൡ എ ഏ ഐ ഒ ഓ ഔ".split(/\s/),
+        vowel_marks: "ാ ി ീ ു ൂ ൃ ൄ ൢ ൣ െ േ ൈ ൊ ോ ൌ".split(/\s/),
+        other_marks: "ം ഃ ँ".split(/\s/),
         virama: ["്"],
-        consonants: "ക ഖ ഗ ഘ ങ ച ഛ ജ ഝ ഞ ട ഠ ഡ ഢ ണ ത ഥ ദ ധ ന പ ഫ ബ ഭ മ യ ര ല വ ശ ഷ സ ഹ ള ക്ഷ ജ്ഞ".w_split,
-        symbols: "൦ ൧ ൨ ൩ ൪ ൫ ൬ ൭ ൮ ൯ ഓം ഽ । ॥".w_split,
-        other: "        റ".w_split,
+        consonants: "ക ഖ ഗ ഘ ങ ച ഛ ജ ഝ ഞ ട ഠ ഡ ഢ ണ ത ഥ ദ ധ ന പ ഫ ബ ഭ മ യ ര ല വ ശ ഷ സ ഹ ള ക്ഷ ജ്ഞ".split(/\s/),
+        symbols: "൦ ൧ ൨ ൩ ൪ ൫ ൬ ൭ ൮ ൯ ഓം ഽ । ॥".split(/\s/),
+        other: "        റ".split(/\s/),
       },
       # Oriya
@@ -145,13 +144,13 @@ module Sanscript
       # Sanskrit-complete.
       #
       oriya: {
-        vowels: "ଅ ଆ ଇ ଈ ଉ ଊ ଋ ୠ ଌ ୡ  ଏ ଐ  ଓ ଔ".w_split,
-        vowel_marks: "ା ି ୀ ୁ ୂ ୃ ୄ ୢ ୣ  େ ୈ  ୋ ୌ".w_split,
-        other_marks: "ଂ ଃ ଁ".w_split,
+        vowels: "ଅ ଆ ଇ ଈ ଉ ଊ ଋ ୠ ଌ ୡ  ଏ ଐ  ଓ ଔ".split(/\s/),
+        vowel_marks: "ା ି ୀ ୁ ୂ ୃ ୄ ୢ ୣ  େ ୈ  ୋ ୌ".split(/\s/),
+        other_marks: "ଂ ଃ ଁ".split(/\s/),
         virama: ["୍"],
-        consonants: "କ ଖ ଗ ଘ ଙ ଚ ଛ ଜ ଝ ଞ ଟ ଠ ଡ ଢ ଣ ତ ଥ ଦ ଧ ନ ପ ଫ ବ ଭ ମ ଯ ର ଲ ଵ ଶ ଷ ସ ହ ଳ କ୍ଷ ଜ୍ଞ".w_split,
-        symbols: "୦ ୧ ୨ ୩ ୪ ୫ ୬ ୭ ୮ ୯ ଓଂ ଽ । ॥".w_split,
-        other: "    ଡ ଢ  ଯ ".w_split,
+        consonants: "କ ଖ ଗ ଘ ଙ ଚ ଛ ଜ ଝ ଞ ଟ ଠ ଡ ଢ ଣ ତ ଥ ଦ ଧ ନ ପ ଫ ବ ଭ ମ ଯ ର ଲ ଵ ଶ ଷ ସ ହ ଳ କ୍ଷ ଜ୍ଞ".split(/\s/),
+        symbols: "୦ ୧ ୨ ୩ ୪ ୫ ୬ ୭ ୮ ୯ ଓଂ ଽ । ॥".split(/\s/),
+        other: "    ଡ ଢ  ଯ ".split(/\s/),
       },
       # Tamil
@@ -160,13 +159,13 @@ module Sanscript
       # The most incomplete of the Sanskrit schemes here.
       #
       tamil: {
-        vowels: "அ ஆ இ ஈ உ ஊ     எ ஏ ஐ ஒ ஓ ஔ".w_split,
-        vowel_marks: "ா ி ீ ு ூ     ெ ே ை ொ ோ ௌ".w_split,
-        other_marks: "ஂ ஃ ".w_split,
+        vowels: "அ ஆ இ ஈ உ ஊ     எ ஏ ஐ ஒ ஓ ஔ".split(/\s/),
+        vowel_marks: "ா ி ீ ு ூ     ெ ே ை ொ ோ ௌ".split(/\s/),
+        other_marks: "ஂ ஃ ".split(/\s/),
         virama: ["்"],
-        consonants: "க க க க ங ச ச ஜ ச ஞ ட ட ட ட ண த த த த ந ப ப ப ப ம ய ர ல வ ஶ ஷ ஸ ஹ ள க்ஷ ஜ்ஞ".w_split,
-        symbols: "௦ ௧ ௨ ௩ ௪ ௫ ௬ ௭ ௮ ௯ ௐ ऽ । ॥".w_split,
-        other: "        ற".w_split,
+        consonants: "க க க க ங ச ச ஜ ச ஞ ட ட ட ட ண த த த த ந ப ப ப ப ம ய ர ல வ ஶ ஷ ஸ ஹ ள க்ஷ ஜ்ஞ".split(/\s/),
+        symbols: "௦ ௧ ௨ ௩ ௪ ௫ ௬ ௭ ௮ ௯ ௐ ऽ । ॥".split(/\s/),
+        other: "        ற".split(/\s/),
       },
       # Telugu
@@ -174,13 +173,13 @@ module Sanscript
       # Sanskrit-complete.
       #
       telugu: {
-        vowels: "అ ఆ ఇ ఈ ఉ ఊ ఋ ౠ ఌ ౡ ఎ ఏ ఐ ఒ ఓ ఔ".w_split,
-        vowel_marks: "ా ి ీ ు ూ ృ ౄ ౢ ౣ ె ే ై ొ ో ౌ".w_split,
-        other_marks: "ం ః ఁ".w_split,
+        vowels: "అ ఆ ఇ ఈ ఉ ఊ ఋ ౠ ఌ ౡ ఎ ఏ ఐ ఒ ఓ ఔ".split(/\s/),
+        vowel_marks: "ా ి ీ ు ూ ృ ౄ ౢ ౣ ె ే ై ొ ో ౌ".split(/\s/),
+        other_marks: "ం ః ఁ".split(/\s/),
         virama: ["్"],
-        consonants: "క ఖ గ ఘ ఙ చ ఛ జ ఝ ఞ ట ఠ డ ఢ ణ త థ ద ధ న ప ఫ బ భ మ య ర ల వ శ ష స హ ళ క్ష జ్ఞ".w_split,
-        symbols: "౦ ౧ ౨ ౩ ౪ ౫ ౬ ౭ ౮ ౯ ఓం ఽ । ॥".w_split,
-        other: "        ఱ".w_split,
+        consonants: "క ఖ గ ఘ ఙ చ ఛ జ ఝ ఞ ట ఠ డ ఢ ణ త థ ద ధ న ప ఫ బ భ మ య ర ల వ శ ష స హ ళ క్ష జ్ఞ".split(/\s/),
+        symbols: "౦ ౧ ౨ ౩ ౪ ౫ ౬ ౭ ౮ ౯ ఓం ఽ । ॥".split(/\s/),
+        other: "        ఱ".split(/\s/),
       },
       # International Alphabet of Sanskrit Transliteration
@@ -188,11 +187,11 @@ module Sanscript
       # The most "professional" Sanskrit romanization scheme.
       #
       iast: {
-        vowels: "a ā i ī u ū ṛ ṝ ḷ ḹ  e ai  o au".w_split,
+        vowels: "a ā i ī u ū ṛ ṝ ḷ ḹ  e ai  o au".split(/\s/),
         other_marks: ["ṃ", "ḥ", "~"],
         virama: [""],
-        consonants: "k kh g gh ṅ c ch j jh ñ ṭ ṭh ḍ ḍh ṇ t th d dh n p ph b bh m y r l v ś ṣ s h ḻ kṣ jñ".w_split,
-        symbols: "0 1 2 3 4 5 6 7 8 9 oṃ ' | ||".w_split,
+        consonants: "k kh g gh ṅ c ch j jh ñ ṭ ṭh ḍ ḍh ṇ t th d dh n p ph b bh m y r l v ś ṣ s h ḻ kṣ jñ".split(/\s/),
+        symbols: "0 1 2 3 4 5 6 7 8 9 oṃ ' | ||".split(/\s/),
       },
       # ITRANS
@@ -204,17 +203,17 @@ module Sanscript
       # '_' is a "null" letter, which allows adjacent vowels.
       #
       itrans: {
-        vowels: "a A i I u U RRi RRI LLi LLI  e ai  o au".w_split,
+        vowels: "a A i I u U RRi RRI LLi LLI  e ai  o au".split(/\s/),
         other_marks: ["M", "H", ".N"],
         virama: [""],
-        consonants: "k kh g gh ~N ch Ch j jh ~n T Th D Dh N t th d dh n p ph b bh m y r l v sh Sh s h L kSh j~n".w_split,
-        symbols: "0 1 2 3 4 5 6 7 8 9 OM .a | ||".w_split,
+        consonants: "k kh g gh ~N ch Ch j jh ~n T Th D Dh N t th d dh n p ph b bh m y r l v sh Sh s h L kSh j~n".split(/\s/),
+        symbols: "0 1 2 3 4 5 6 7 8 9 OM .a | ||".split(/\s/),
         candra: [".c"],
         zwj: ["{}"],
         skip: ["_"],
         accent: ["\\'", "\\_"],
-        combo_accent: "\\'H \\_H \\'M \\_M".w_split,
-        other: "q K G z .D .Dh f Y R".w_split,
+        combo_accent: "\\'H \\_H \\'M \\_M".split(/\s/),
+        other: "q K G z .D .Dh f Y R".split(/\s/),
       },
       # Harvard-Kyoto
@@ -222,11 +221,11 @@ module Sanscript
       # A simple 1:1 mapping.
       #
       hk: {
-        vowels: "a A i I u U R RR lR lRR  e ai  o au".w_split,
-        other_marks: "M H ~".w_split,
+        vowels: "a A i I u U R RR lR lRR  e ai  o au".split(/\s/),
+        other_marks: "M H ~".split(/\s/),
         virama: [""],
-        consonants: "k kh g gh G c ch j jh J T Th D Dh N t th d dh n p ph b bh m y r l v z S s h L kS jJ".w_split,
-        symbols: "0 1 2 3 4 5 6 7 8 9 OM ' | ||".w_split,
+        consonants: "k kh g gh G c ch j jh J T Th D Dh N t th d dh n p ph b bh m y r l v z S s h L kS jJ".split(/\s/),
+        symbols: "0 1 2 3 4 5 6 7 8 9 OM ' | ||".split(/\s/),
       },
       # National Library at Kolkata
@@ -243,11 +242,11 @@ module Sanscript
       # scheme in use today and is especially suited to computer processing.
       #
       slp1: {
-        vowels: "a A i I u U f F x X  e E  o O".w_split,
-        other_marks: "M H ~".w_split,
+        vowels: "a A i I u U f F x X  e E  o O".split(/\s/),
+        other_marks: "M H ~".split(/\s/),
         virama: [""],
-        consonants: "k K g G N c C j J Y w W q Q R t T d D n p P b B m y r l v S z s h L kz jY".w_split,
-        symbols: "0 1 2 3 4 5 6 7 8 9 oM ' | ||".w_split,
+        consonants: "k K g G N c C j J Y w W q Q R t T d D n p P b B m y r l v S z s h L kz jY".split(/\s/),
+        symbols: "0 1 2 3 4 5 6 7 8 9 oM ' | ||".split(/\s/),
       },
       # Velthuis
@@ -255,11 +254,11 @@ module Sanscript
       # A case-insensitive Sanskrit encoding.
       #
       velthuis: {
-        vowels: "a aa i ii u uu .r .rr .li .ll  e ai  o au".w_split,
-        other_marks: ".m .h ".w_split,
+        vowels: "a aa i ii u uu .r .rr .li .ll  e ai  o au".split(/\s/),
+        other_marks: ".m .h ".split(/\s/),
         virama: [""],
-        consonants: 'k kh g gh "n c ch j jh ~n .t .th .d .dh .n t th d dh n p ph b bh m y r l v ~s .s s h L k.s j~n'.w_split,
-        symbols: "0 1 2 3 4 5 6 7 8 9 o.m ' | ||".w_split,
+        consonants: 'k kh g gh "n c ch j jh ~n .t .th .d .dh .n t th d dh n p ph b bh m y r l v ~s .s s h L k.s j~n'.split(/\s/),
+        symbols: "0 1 2 3 4 5 6 7 8 9 o.m ' | ||".split(/\s/),
       },
       # WX
@@ -267,11 +266,11 @@ module Sanscript
       # As terse as SLP1.
       #
       wx: {
-        vowels: "a A i I u U q Q L   e E  o O".w_split,
-        other_marks: "M H z".w_split,
+        vowels: "a A i I u U q Q L   e E  o O".split(/\s/),
+        other_marks: "M H z".split(/\s/),
         virama: [""],
-        consonants: "k K g G f c C j J F t T d D N w W x X n p P b B m y r l v S R s h  kR jF".w_split,
-        symbols: "0 1 2 3 4 5 6 7 8 9 oM ' | ||".w_split,
+        consonants: "k K g G f c C j J F t T d D N w W x X n p P b B m y r l v S R s h  kR jF".split(/\s/),
+        symbols: "0 1 2 3 4 5 6 7 8 9 oM ' | ||".split(/\s/),
       },
     }
@@ -307,7 +306,7 @@ module Sanscript
         "\\_" => ["\\`"],
         "\\_H" => ["\\`H"],
         "\\'M" => ["\\'.m", "\\'.n"],
-        "\\_M" => "\\_.m \\_.n \\`M \\`.m \\`.n".w_split,
+        "\\_M" => "\\_.m \\_.n \\`M \\`.m \\`.n".split(/\s/),
         ".a" => ["~"],
         "|" => ["."],
         "||" => [".."],

data/lib/sanscript/version.rb CHANGED

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Sanscript
   # The version number
-  VERSION = "0.4.3"
+  VERSION = "0.5.0"
 end

data/sanscript.gemspec CHANGED

@@ -29,5 +29,5 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "benchmark-ips", "~> 2.6"
   spec.add_development_dependency "yard", "~> 0.9"
-  spec.add_runtime_dependency "ice_nine", "~> 0.11"
+  spec.add_runtime_dependency "ragabash", "~> 0.1"
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: sanscript
 version: !ruby/object:Gem::Version
-  version: 0.4.3
+  version: 0.5.0
 platform: ruby
 authors:
 - Tim Bellefleur
@@ -109,19 +109,19 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '0.9'
 - !ruby/object:Gem::Dependency
-  name: ice_nine
+  name: ragabash
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.11'
+        version: '0.1'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.11'
+        version: '0.1'
 description:
 email:
 - nomoon@phoebus.ca
@@ -146,7 +146,7 @@ files:
 - lib/sanscript/detect.rb
 - lib/sanscript/detect/ruby24.rb
 - lib/sanscript/detect/ruby2x.rb
-- lib/sanscript/refinements.rb
+- lib/sanscript/exceptions.rb
 - lib/sanscript/transliterate.rb
 - lib/sanscript/transliterate/schemes.rb
 - lib/sanscript/version.rb

data/lib/sanscript/refinements.rb DELETED

@@ -1,95 +0,0 @@
-# frozen_string_literal: true
-require "ice_nine"
-module Sanscript
-  # A set of helpful refinements for duplication and deep freezing.
-  module Refinements
-    refine Object do
-      def deep_dup
-        dup
-      rescue TypeError
-        self
-      end
-      def deep_freeze
-        IceNine.deep_freeze(self)
-      end
-    end
-    refine NilClass do
-      def deep_dup
-        self
-      end
-    end
-    refine FalseClass do
-      def deep_dup
-        self
-      end
-    end
-    refine TrueClass do
-      def deep_dup
-        self
-      end
-    end
-    refine Symbol do
-      def deep_dup
-        self
-      end
-    end
-    refine Numeric do
-      def deep_dup
-        self
-      end
-    end
-    # Necessary to re-override Numeric
-    require "bigdecimal"
-    refine BigDecimal do
-      def deep_dup
-        dup
-      end
-    end
-    refine String do
-      def w_split
-        split(/\s/)
-      end
-    end
-    refine Array do
-      def deep_dup
-        map { |value| value.deep_dup } # rubocop:disable Style/SymbolProc
-      end
-    end
-    refine Hash do
-      def deep_dup
-        hash = dup
-        each_pair do |key, value|
-          if ::String === key # rubocop:disable Style/CaseEquality
-            hash[key] = value.deep_dup
-          else
-            hash.delete(key)
-            hash[key.deep_dup] = value.deep_dup
-          end
-        end
-        hash
-      end
-    end
-    refine Set do
-      def deep_dup
-        set_a = to_a
-        set_a.map! do |val|
-          next val if ::String === val # rubocop:disable Style/CaseEquality
-          val.deep_dup
-        end
-        self.class[set_a]
-      end
-    end
-  end
-end