hebrew 0.0.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +6 -14
- data/lib/hebrew.rb +43 -6
- metadata +5 -5
    
        checksums.yaml
    CHANGED
    
    | @@ -1,15 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 | 
            -
             | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
              metadata.gz: !binary |-
         | 
| 9 | 
            -
                MzcxYjMzODk1MWZlM2VkMWE4OGZjYjczNzQ4YmIyY2IzMzc2NzI3MDBjNGI1
         | 
| 10 | 
            -
                OWFhZDFhYzdiZWExMTczNzAwNjZhYWJkMDM5MGM0YWQ4YTYxZGVlYTliM2Rh
         | 
| 11 | 
            -
                ZGY1ZTdmMjY5NjNjZDA3MTMyMzY4MzU5YTZhNDEzNzNhMjJjNWY=
         | 
| 12 | 
            -
              data.tar.gz: !binary |-
         | 
| 13 | 
            -
                OGYwODM0MTk0NmQ0YjQxMjEzMTYzMWQ4MzUyYTQ2YjY1YWEwOTMxMjQzZDNl
         | 
| 14 | 
            -
                MWZmOWIxNjk3NzFhYTI1YjA4ZTc1NDhlMjM0MzQ0NzBjMTE4YjRlNjRjOTRm
         | 
| 15 | 
            -
                ZTZhMDc0YjQ4NDYzYzhmMmE2NGNhYjQxOTk4NDgzMTQxYTgwOTE=
         | 
| 2 | 
            +
            SHA1:
         | 
| 3 | 
            +
              metadata.gz: 41268cfa1a659bd6997ec10f67fbc3afd7d15f19
         | 
| 4 | 
            +
              data.tar.gz: f2d888c38878d69b06e7e9a057a58ffe175cf6af
         | 
| 5 | 
            +
            SHA512:
         | 
| 6 | 
            +
              metadata.gz: 686135b818c4d6867fe4a005ce09862705ed9b9eac5ed309e5bdbcae8bc1cd04764f8cd8f49feb1db6eccd4002abb4bde65b72f0db15cec9d53a9a91dc4a22b9
         | 
| 7 | 
            +
              data.tar.gz: 1241dc30e1b38a16cc2371d5d0e5c43441022b700a1c1569508e220fb69c6ef8d7d05af3bc965593ca19bce078c4159773532cfef32089bf8ba7820763321aab
         | 
    
        data/lib/hebrew.rb
    CHANGED
    
    | @@ -3,19 +3,42 @@ | |
| 3 3 | 
             
            # @author Asaf Bartov <asaf.bartov@gmail.com>
         | 
| 4 4 | 
             
            #
         | 
| 5 5 |  | 
| 6 | 
            -
             | 
| 7 | 
            -
            NIKKUD_CP1255 = [ | 
| 8 | 
            -
             | 
| 6 | 
            +
            # codepoints for CP1255 nikkud
         | 
| 7 | 
            +
            NIKKUD_CP1255 = [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 209, 210]
         | 
| 8 | 
            +
            #NIKKUD_CP1255 = ["\xc0".force_encoding('windows-1255'), "\xc1".force_encoding('windows-1255'), "\xc2".force_encoding('windows-1255'), "\xc3".force_encoding('windows-1255'), "\xc4".force_encoding('windows-1255'), "\xc5".force_encoding('windows-1255'), "\xc6".force_encoding('windows-1255'), "\xc7".force_encoding('windows-1255'), "\xc8".force_encoding('windows-1255'), "\xc9".force_encoding('windows-1255'), "\xcb".force_encoding('windows-1255'), "\xcc".force_encoding('windows-1255'), "\xd1".force_encoding('windows-1255'), "\xd2".force_encoding('windows-1255')] # wow, this is fugly.  Is there a neater way to specify CP1255 literal?
         | 
| 9 | 
            +
            NIKKUD_UTF8 = [0x05b0, 0x05b1, 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 0x05c1, 0x05c2]
         | 
| 10 | 
            +
            #NIKKUD_UTF8 = ["\u05b0", "\u05b1", "\u05b2", "\u05b3", "\u05b4", "\u05b5", "\u05b6", "\u05b7", "\u05b8", "\u05b9", "\u05bb", "\u05bc", "\u05c1", "\u05c2"]
         | 
| 9 11 | 
             
            # TODO: Mac encoding
         | 
| 10 12 |  | 
| 13 | 
            +
            FIANLS_CP1255 = ["\xea".force_encoding('windows-1255'), "\xed".force_encoding('windows-1255'), "\xef".force_encoding('windows-1255'), "\xf3".force_encoding('windows-1255'), "\xf5".force_encoding('windows-1255')]
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            FINALS_UTF8 = []
         | 
| 16 | 
            +
             | 
| 11 17 | 
             
            # extend String class
         | 
| 12 18 | 
             
            class String
         | 
| 13 19 | 
             
              # this will return the string, stripped of any Hebrew nikkud characters
         | 
| 14 20 | 
             
              def strip_nikkud
         | 
| 21 | 
            +
                case self.encoding
         | 
| 22 | 
            +
                when Encoding::UTF_8
         | 
| 23 | 
            +
                  strip_nikkud_utf8
         | 
| 24 | 
            +
                when Encoding::WINDOWS_1255 || Encoding::CP1255
         | 
| 25 | 
            +
                  strip_nikkud_cp1255
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
              end
         | 
| 28 | 
            +
              def strip_nikkud_cp1255
         | 
| 29 | 
            +
                target = ''.force_encoding('windows-1255')
         | 
| 30 | 
            +
                self.each_codepoint {|cp|
         | 
| 31 | 
            +
                  unless self.class.is_codepoint_nikkud_cp1255(cp)
         | 
| 32 | 
            +
                    target += cp.chr(Encoding::CP1255) # is there a neater way?
         | 
| 33 | 
            +
                  end
         | 
| 34 | 
            +
                }
         | 
| 35 | 
            +
                return target
         | 
| 36 | 
            +
              end
         | 
| 37 | 
            +
              def strip_nikkud_utf8
         | 
| 15 38 | 
             
                target = ''
         | 
| 16 | 
            -
                self. | 
| 17 | 
            -
                  unless  | 
| 18 | 
            -
                    target +=  | 
| 39 | 
            +
                self.each_codepoint {|cp|
         | 
| 40 | 
            +
                  unless self.class.is_codepoint_nikkud_utf8(cp)
         | 
| 41 | 
            +
                    target += cp.chr(Encoding::UTF_8)
         | 
| 19 42 | 
             
                  end
         | 
| 20 43 | 
             
                }
         | 
| 21 44 | 
             
                return target
         | 
| @@ -24,6 +47,12 @@ class String | |
| 24 47 | 
             
              def is_nikkud(c)
         | 
| 25 48 | 
             
                self.class.is_nikkud_by_encoding(c, self.encoding) # delegate to class method based on instance encoding
         | 
| 26 49 | 
             
              end
         | 
| 50 | 
            +
              def self.is_codepoint_nikkud_cp1255(cp)
         | 
| 51 | 
            +
                NIKKUD_CP1255.include?(cp)
         | 
| 52 | 
            +
              end
         | 
| 53 | 
            +
              def self.is_codepoint_nikkud_utf8(cp)
         | 
| 54 | 
            +
                NIKKUD_UTF8.include?(cp)
         | 
| 55 | 
            +
              end
         | 
| 27 56 | 
             
              def self.is_nikkud_by_encoding(c, encoding)
         | 
| 28 57 | 
             
                case encoding
         | 
| 29 58 | 
             
                when Encoding::UTF_8
         | 
| @@ -35,4 +64,12 @@ class String | |
| 35 64 | 
             
                # TODO: add Mac encoding?
         | 
| 36 65 | 
             
                end
         | 
| 37 66 | 
             
              end
         | 
| 67 | 
            +
              def self.is_final_by_encoding(c, encoding)
         | 
| 68 | 
            +
                case encoding
         | 
| 69 | 
            +
                when Encoding::UTF_8
         | 
| 70 | 
            +
                  FIANLS_UTF8.include?(c)
         | 
| 71 | 
            +
                when Encoding::WINDOWS_1255 || Encoding::CP1255
         | 
| 72 | 
            +
                  FINALS_CP1255.include?(c)
         | 
| 73 | 
            +
                end
         | 
| 74 | 
            +
              end
         | 
| 38 75 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: hebrew
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.1.3
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Asaf Bartov
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2014-02-11 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies: []
         | 
| 13 13 | 
             
            description: Some useful code to identify, transcode, and manipulate Hebrew text
         | 
| 14 14 | 
             
            email: asaf.bartov@gmail.com
         | 
| @@ -27,17 +27,17 @@ require_paths: | |
| 27 27 | 
             
            - lib
         | 
| 28 28 | 
             
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 29 29 | 
             
              requirements:
         | 
| 30 | 
            -
              - -  | 
| 30 | 
            +
              - - '>='
         | 
| 31 31 | 
             
                - !ruby/object:Gem::Version
         | 
| 32 32 | 
             
                  version: '0'
         | 
| 33 33 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 34 34 | 
             
              requirements:
         | 
| 35 | 
            -
              - -  | 
| 35 | 
            +
              - - '>='
         | 
| 36 36 | 
             
                - !ruby/object:Gem::Version
         | 
| 37 37 | 
             
                  version: '0'
         | 
| 38 38 | 
             
            requirements: []
         | 
| 39 39 | 
             
            rubyforge_project: 
         | 
| 40 | 
            -
            rubygems_version: 2. | 
| 40 | 
            +
            rubygems_version: 2.2.1
         | 
| 41 41 | 
             
            signing_key: 
         | 
| 42 42 | 
             
            specification_version: 4
         | 
| 43 43 | 
             
            summary: Hebrew string manipulation
         |