hebrew 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/hebrew.rb +21 -9
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cdc23d69ac6c99a5089730a3980986821a750c9b
4
- data.tar.gz: b436b60e4093251483a256677308733488108d95
3
+ metadata.gz: db796d5a2e993a3e80c0533e59be71ddea35ff77
4
+ data.tar.gz: 907c43cec960cd4080f81084d555e627174e71cc
5
5
  SHA512:
6
- metadata.gz: 4bebdf8bb10c1101b93811a36d2b21ac12c76068040e02073e35bd5570b084c802c8cc4ecf0bbf23bc5663bb00c8a3ecfe67b9db9d1b9adc2ca124534c88e5b8
7
- data.tar.gz: c66e39755a562a5ac674deba0d678c5e115a74fbb16e12d2fdb299deeab608433168a5460d62f5e8e9401510e3f1735e97adf3bb7c89a7dc382e7717547e9b39
6
+ metadata.gz: c3d0e503b1ad747277dd6aa9ed98fcdf1ea32838bfd396f00657fb834f6715454d81a8bd23f66c0196085f62b763e6f2504c1bf0765f1f685958db92974e1851
7
+ data.tar.gz: 3c5221280f64cc1338f10ecf32fe73f3ce2bed78963d67c575179eb286eb64d7eed1e72c7bb84da2d89f75b0f55be8486f06cbd5d45346017e0e7d93f53ca595
data/lib/hebrew.rb CHANGED
@@ -4,17 +4,18 @@
4
4
  #
5
5
 
6
6
  # codepoints for CP1255 nikkud
7
- NIKKUD_CP1255 = [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 209, 210]
7
+ NIKKUD_CP1255 = [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 209, 210]
8
8
  #NIKKUD_CP1255 = ["\xc0".force_encoding('windows-1255'), "\xc1".force_encoding('windows-1255'), "\xc2".force_encoding('windows-1255'), "\xc3".force_encoding('windows-1255'), "\xc4".force_encoding('windows-1255'), "\xc5".force_encoding('windows-1255'), "\xc6".force_encoding('windows-1255'), "\xc7".force_encoding('windows-1255'), "\xc8".force_encoding('windows-1255'), "\xc9".force_encoding('windows-1255'), "\xcb".force_encoding('windows-1255'), "\xcc".force_encoding('windows-1255'), "\xd1".force_encoding('windows-1255'), "\xd2".force_encoding('windows-1255')] # wow, this is fugly. Is there a neater way to specify CP1255 literal?
9
- NIKKUD_UTF8 = [0x05b0, 0x05b1, 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 0x05c1, 0x05c2]
9
+ NIKKUD_UTF8 = [0x05b0, 0x05b1, 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 0x05bd, 0x05bf, 0x05c1, 0x05c2]
10
10
  #NIKKUD_UTF8 = ["\u05b0", "\u05b1", "\u05b2", "\u05b3", "\u05b4", "\u05b5", "\u05b6", "\u05b7", "\u05b8", "\u05b9", "\u05bb", "\u05bc", "\u05c1", "\u05c2"]
11
11
  # TODO: Mac encoding
12
12
 
13
13
  FIANLS_CP1255 = ["\xea".force_encoding('windows-1255'), "\xed".force_encoding('windows-1255'), "\xef".force_encoding('windows-1255'), "\xf3".force_encoding('windows-1255'), "\xf5".force_encoding('windows-1255')]
14
14
 
15
- FINALS_UTF8 = []
15
+ FINALS_UTF8 = ["\u05da", "\u05dd", "\u05df", "\u05e3", "\u05e5"]
16
16
  HEB_UTF8_START = 1424
17
17
  HEB_UTF8_END = 1535
18
+
18
19
  # extend String class
19
20
  class String
20
21
  # this will return the string, stripped of any Hebrew nikkud characters
@@ -44,6 +45,7 @@ class String
44
45
  }
45
46
  return target
46
47
  end
48
+ # this will return true if the string contains any Hebrew character (short circuit)
47
49
  def any_hebrew?
48
50
  case self.encoding
49
51
  when Encoding::UTF_8
@@ -56,6 +58,10 @@ class String
56
58
  return false
57
59
  end
58
60
  end
61
+ def is_hebrew_codepoint_cp1255(cp)
62
+ if (cp > 191 && cp < 202) or [203, 204, 209, 210].include?(cp)
63
+
64
+ end
59
65
  def is_hebrew_codepoint_utf8(cp)
60
66
  if cp >= HEB_UTF8_START && cp <= HEB_UTF8_END
61
67
  return true
@@ -63,27 +69,33 @@ class String
63
69
  return false
64
70
  end
65
71
  end
72
+
66
73
  # TODO: add strip_nikkud!
74
+
75
+ # this will return true if the parameter is a nikkud character
67
76
  def is_nikkud(c)
68
77
  self.class.is_nikkud_by_encoding(c, self.encoding) # delegate to class method based on instance encoding
69
78
  end
79
+
70
80
  def self.is_codepoint_nikkud_cp1255(cp)
71
- NIKKUD_CP1255.include?(cp)
81
+ return (cp > 191 && cp < 205) or [209, 210].include?(cp)
82
+ #NIKKUD_CP1255.include?(cp) # cleaner, but much slower
72
83
  end
73
84
  def self.is_codepoint_nikkud_utf8(cp)
74
- NIKKUD_UTF8.include?(cp)
85
+ return (cp > 0x05af && cp < 0x05ba) or [0x05bb, 0x05bc, 0x05c1, 0x05c2].include?(cp)
86
+ #NIKKUD_UTF8.include?(cp) # cleaner, but much slower
75
87
  end
88
+ # this will return true if the first parameter is a nikkud character in the encoding of the second parameter
76
89
  def self.is_nikkud_by_encoding(c, encoding)
77
90
  case encoding
78
91
  when Encoding::UTF_8
79
- # DBG: puts "utf8 - #{c} - #{c.codepoints.first}"
80
- NIKKUD_UTF8.include?(c)
92
+ self.is_codepoint_nikkud_utf8(c.codepoints.first)
81
93
  when Encoding::WINDOWS_1255 || Encoding::CP1255
82
- # DBG: puts "cp1255 - #{c} - #{c.codepoints.first}"
83
- NIKKUD_CP1255.include?(c)
94
+ self.is_codepoint_nikkud_cp1255(c.codepoints.first)
84
95
  # TODO: add Mac encoding?
85
96
  end
86
97
  end
98
+ # this will return true if the first parameter is a final letter in the encoding of the second parameter
87
99
  def self.is_final_by_encoding(c, encoding)
88
100
  case encoding
89
101
  when Encoding::UTF_8
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hebrew
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Bartov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-18 00:00:00.000000000 Z
11
+ date: 2014-04-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Some useful code to identify, transcode, and manipulate Hebrew text
14
14
  email: asaf.bartov@gmail.com
@@ -17,7 +17,7 @@ extensions: []
17
17
  extra_rdoc_files: []
18
18
  files:
19
19
  - lib/hebrew.rb
20
- homepage: http://rubygems.org/gems/hebrew
20
+ homepage: https://github.com/abartov/hebrew
21
21
  licenses:
22
22
  - MIT
23
23
  metadata: {}