RubyGems - hebrew - Versions diffs - 0.1.5 → 0.1.6 - Mend

hebrew 0.1.5 → 0.1.6

Files changed (3) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: cdc23d69ac6c99a5089730a3980986821a750c9b
-  data.tar.gz: b436b60e4093251483a256677308733488108d95
+  metadata.gz: db796d5a2e993a3e80c0533e59be71ddea35ff77
+  data.tar.gz: 907c43cec960cd4080f81084d555e627174e71cc
 SHA512:
-  metadata.gz: 4bebdf8bb10c1101b93811a36d2b21ac12c76068040e02073e35bd5570b084c802c8cc4ecf0bbf23bc5663bb00c8a3ecfe67b9db9d1b9adc2ca124534c88e5b8
-  data.tar.gz: c66e39755a562a5ac674deba0d678c5e115a74fbb16e12d2fdb299deeab608433168a5460d62f5e8e9401510e3f1735e97adf3bb7c89a7dc382e7717547e9b39
+  metadata.gz: c3d0e503b1ad747277dd6aa9ed98fcdf1ea32838bfd396f00657fb834f6715454d81a8bd23f66c0196085f62b763e6f2504c1bf0765f1f685958db92974e1851
+  data.tar.gz: 3c5221280f64cc1338f10ecf32fe73f3ce2bed78963d67c575179eb286eb64d7eed1e72c7bb84da2d89f75b0f55be8486f06cbd5d45346017e0e7d93f53ca595

data/lib/hebrew.rb CHANGED Viewed

@@ -4,17 +4,18 @@
 #
 # codepoints for CP1255 nikkud
-NIKKUD_CP1255 = [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 209, 210]
+NIKKUD_CP1255 = [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 209, 210]
 #NIKKUD_CP1255 = ["\xc0".force_encoding('windows-1255'), "\xc1".force_encoding('windows-1255'), "\xc2".force_encoding('windows-1255'), "\xc3".force_encoding('windows-1255'), "\xc4".force_encoding('windows-1255'), "\xc5".force_encoding('windows-1255'), "\xc6".force_encoding('windows-1255'), "\xc7".force_encoding('windows-1255'), "\xc8".force_encoding('windows-1255'), "\xc9".force_encoding('windows-1255'), "\xcb".force_encoding('windows-1255'), "\xcc".force_encoding('windows-1255'), "\xd1".force_encoding('windows-1255'), "\xd2".force_encoding('windows-1255')] # wow, this is fugly.  Is there a neater way to specify CP1255 literal?
-NIKKUD_UTF8 = [0x05b0, 0x05b1, 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 0x05c1, 0x05c2]
+NIKKUD_UTF8 = [0x05b0, 0x05b1, 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 0x05bd, 0x05bf, 0x05c1, 0x05c2]
 #NIKKUD_UTF8 = ["\u05b0", "\u05b1", "\u05b2", "\u05b3", "\u05b4", "\u05b5", "\u05b6", "\u05b7", "\u05b8", "\u05b9", "\u05bb", "\u05bc", "\u05c1", "\u05c2"]
 # TODO: Mac encoding
 FIANLS_CP1255 = ["\xea".force_encoding('windows-1255'), "\xed".force_encoding('windows-1255'), "\xef".force_encoding('windows-1255'), "\xf3".force_encoding('windows-1255'), "\xf5".force_encoding('windows-1255')]
-FINALS_UTF8 = []
+FINALS_UTF8 = ["\u05da", "\u05dd", "\u05df", "\u05e3", "\u05e5"]
 HEB_UTF8_START = 1424
 HEB_UTF8_END = 1535
 # extend String class
 class String
   # this will return the string, stripped of any Hebrew nikkud characters
@@ -44,6 +45,7 @@ class String
     }
     return target
   end
+  # this will return true if the string contains any Hebrew character (short circuit)
   def any_hebrew?
     case self.encoding
     when Encoding::UTF_8
@@ -56,6 +58,10 @@ class String
       return false
     end
   end
+  def is_hebrew_codepoint_cp1255(cp)
+    if (cp > 191 && cp < 202) or [203, 204, 209, 210].include?(cp)
+  end
   def is_hebrew_codepoint_utf8(cp)
     if cp >= HEB_UTF8_START && cp <= HEB_UTF8_END
       return true
@@ -63,27 +69,33 @@ class String
       return false
     end
   end
   # TODO: add strip_nikkud!
+  # this will return true if the parameter is a nikkud character
   def is_nikkud(c)
     self.class.is_nikkud_by_encoding(c, self.encoding) # delegate to class method based on instance encoding
   end
   def self.is_codepoint_nikkud_cp1255(cp)
-    NIKKUD_CP1255.include?(cp)
+    return (cp > 191 && cp < 205) or [209, 210].include?(cp)
+    #NIKKUD_CP1255.include?(cp) # cleaner, but much slower
   end
   def self.is_codepoint_nikkud_utf8(cp)
-    NIKKUD_UTF8.include?(cp)
+    return (cp > 0x05af && cp < 0x05ba) or [0x05bb, 0x05bc, 0x05c1, 0x05c2].include?(cp)
+    #NIKKUD_UTF8.include?(cp) # cleaner, but much slower
   end
+  # this will return true if the first parameter is a nikkud character in the encoding of the second parameter
   def self.is_nikkud_by_encoding(c, encoding)
     case encoding
     when Encoding::UTF_8
-      # DBG: puts "utf8 - #{c} - #{c.codepoints.first}"
-      NIKKUD_UTF8.include?(c)
+      self.is_codepoint_nikkud_utf8(c.codepoints.first)
     when Encoding::WINDOWS_1255 || Encoding::CP1255
-      # DBG: puts "cp1255 - #{c} - #{c.codepoints.first}"
-      NIKKUD_CP1255.include?(c)
+      self.is_codepoint_nikkud_cp1255(c.codepoints.first)
     # TODO: add Mac encoding?
     end
   end
+  # this will return true if the first parameter is a final letter in the encoding of the second parameter
   def self.is_final_by_encoding(c, encoding)
     case encoding
     when Encoding::UTF_8

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: hebrew
 version: !ruby/object:Gem::Version
-  version: 0.1.5
+  version: 0.1.6
 platform: ruby
 authors:
 - Asaf Bartov
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-02-18 00:00:00.000000000 Z
+date: 2014-04-02 00:00:00.000000000 Z
 dependencies: []
 description: Some useful code to identify, transcode, and manipulate Hebrew text
 email: asaf.bartov@gmail.com
@@ -17,7 +17,7 @@ extensions: []
 extra_rdoc_files: []
 files:
 - lib/hebrew.rb
-homepage: http://rubygems.org/gems/hebrew
+homepage: https://github.com/abartov/hebrew
 licenses:
 - MIT
 metadata: {}