hebrew 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +15 -0
  2. data/lib/hebrew.rb +38 -0
  3. metadata +44 -0
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZGQ0NGU1ODllODliMzU4NmNhOTI4ZGJkMmU4ZjBlNjg2ZTY0NTE4YQ==
5
+ data.tar.gz: !binary |-
6
+ OWJiNjU1MTMyYTRmZjZmZWE5MDY1NWVmMWNlOTJkYTM1MTVkNjQ2Yg==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MzcxYjMzODk1MWZlM2VkMWE4OGZjYjczNzQ4YmIyY2IzMzc2NzI3MDBjNGI1
10
+ OWFhZDFhYzdiZWExMTczNzAwNjZhYWJkMDM5MGM0YWQ4YTYxZGVlYTliM2Rh
11
+ ZGY1ZTdmMjY5NjNjZDA3MTMyMzY4MzU5YTZhNDEzNzNhMjJjNWY=
12
+ data.tar.gz: !binary |-
13
+ OGYwODM0MTk0NmQ0YjQxMjEzMTYzMWQ4MzUyYTQ2YjY1YWEwOTMxMjQzZDNl
14
+ MWZmOWIxNjk3NzFhYTI1YjA4ZTc1NDhlMjM0MzQ0NzBjMTE4YjRlNjRjOTRm
15
+ ZTZhMDc0YjQ4NDYzYzhmMmE2NGNhYjQxOTk4NDgzMTQxYTgwOTE=
data/lib/hebrew.rb ADDED
@@ -0,0 +1,38 @@
1
+ # Some useful Hebrew manipulation routines
2
+ #
3
+ # @author Asaf Bartov <asaf.bartov@gmail.com>
4
+ #
5
+
6
+
7
+ NIKKUD_CP1255 = ["\xc0".force_encoding('windows-1255'), "\xc1".force_encoding('windows-1255'), "\xc2".force_encoding('windows-1255'), "\xc3".force_encoding('windows-1255'), "\xc4".force_encoding('windows-1255'), "\xc5".force_encoding('windows-1255'), "\xc6".force_encoding('windows-1255'), "\xc7".force_encoding('windows-1255'), "\xc8".force_encoding('windows-1255'), "\xc9".force_encoding('windows-1255'), "\xcb".force_encoding('windows-1255'), "\xcc".force_encoding('windows-1255'), "\xd1".force_encoding('windows-1255'), "\xd2".force_encoding('windows-1255')] # wow, this is fugly. Is there a neater way to specify CP1255 literal?
8
+ NIKKUD_UTF8 = ["\u05b0", "\u05b1", "\u05b2", "\u05b3", "\u05b4", "\u05b5", "\u05b6", "\u05b7", "\u05b8", "\u05b9", "\u05bb", "\u05bc", "\u05c1", "\u05c2"]
9
+ # TODO: Mac encoding
10
+
11
+ # extend String class
12
+ class String
13
+ # this will return the string, stripped of any Hebrew nikkud characters
14
+ def strip_nikkud
15
+ target = ''
16
+ self.each_char {|c|
17
+ unless is_nikkud(c)
18
+ target += c
19
+ end
20
+ }
21
+ return target
22
+ end
23
+ # TODO: add strip_nikkud!
24
+ def is_nikkud(c)
25
+ self.class.is_nikkud_by_encoding(c, self.encoding) # delegate to class method based on instance encoding
26
+ end
27
+ def self.is_nikkud_by_encoding(c, encoding)
28
+ case encoding
29
+ when Encoding::UTF_8
30
+ # DBG: puts "utf8 - #{c} - #{c.codepoints.first}"
31
+ NIKKUD_UTF8.include?(c)
32
+ when Encoding::WINDOWS_1255 || Encoding::CP1255
33
+ # DBG: puts "cp1255 - #{c} - #{c.codepoints.first}"
34
+ NIKKUD_CP1255.include?(c)
35
+ # TODO: add Mac encoding?
36
+ end
37
+ end
38
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hebrew
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Asaf Bartov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-09-10 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Some useful code to identify, transcode, and manipulate Hebrew text
14
+ email: asaf.bartov@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/hebrew.rb
20
+ homepage: http://rubygems.org/gems/hebrew
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.0.6
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Hebrew string manipulation
44
+ test_files: []