hebrew 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +5 -5
  2. data/lib/hebrew.rb +26 -2
  3. metadata +3 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 3d90717862d817405afdf88fbdc31eefbcc90370
4
- data.tar.gz: 43f84e5a282a698eee70581b68ca96e5c4dc368d
2
+ SHA256:
3
+ metadata.gz: '0801a90c50ccf260e20daa15b9c9691b18fc3dc6908a2221614f4c1ab6a3f453'
4
+ data.tar.gz: 9b4309ede2544c914b134cfdd06c81e806d2908ba41105f7799f7e3d3e25bec3
5
5
  SHA512:
6
- metadata.gz: 4c13beabf918c30be2deae92c2cf5e5949c44e8e2a0f7689caa28b2851626d239e835510cdc7c4141aaa5ff22e875f90b5199cb7670f003ed690a642aaf3f8d7
7
- data.tar.gz: 7f9fd7db90327bad58d00f8fdaae4f03f26fe0cef099ab02115585581f79fb708d5326c01436028afd9beb69d50f7d92d04bb13248d056b666135e4038bc036d
6
+ metadata.gz: 2e6885a12245311fa51f8610fe43c8903763251bf718b2db053c9b97deb741f64b83f5f5245dcd3a56fbcb668e614b5e284f41f287ef2278f9dc347c1f6f8ea0
7
+ data.tar.gz: 44dab01c3339147f11ad69567a6087d3f48680c44b226e03839a14d39c36c19fd3a39d0c2de9a0ea9ad2dd290cd63a740c0b3cec7d9f59b69a7eccf2b1d25fbd
data/lib/hebrew.rb CHANGED
@@ -5,9 +5,7 @@
5
5
 
6
6
  # codepoints for CP1255 nikkud
7
7
  NIKKUD_CP1255 = [192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 209, 210]
8
- #NIKKUD_CP1255 = ["\xc0".force_encoding('windows-1255'), "\xc1".force_encoding('windows-1255'), "\xc2".force_encoding('windows-1255'), "\xc3".force_encoding('windows-1255'), "\xc4".force_encoding('windows-1255'), "\xc5".force_encoding('windows-1255'), "\xc6".force_encoding('windows-1255'), "\xc7".force_encoding('windows-1255'), "\xc8".force_encoding('windows-1255'), "\xc9".force_encoding('windows-1255'), "\xcb".force_encoding('windows-1255'), "\xcc".force_encoding('windows-1255'), "\xd1".force_encoding('windows-1255'), "\xd2".force_encoding('windows-1255')] # wow, this is fugly. Is there a neater way to specify CP1255 literal?
9
8
  NIKKUD_UTF8 = [0x05b0, 0x05b1, 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 0x05bd, 0x05bf, 0x05c1, 0x05c2]
10
- #NIKKUD_UTF8 = ["\u05b0", "\u05b1", "\u05b2", "\u05b3", "\u05b4", "\u05b5", "\u05b6", "\u05b7", "\u05b8", "\u05b9", "\u05bb", "\u05bc", "\u05c1", "\u05c2"]
11
9
  # TODO: Mac encoding
12
10
 
13
11
  FIANLS_CP1255 = ["\xea".force_encoding('windows-1255'), "\xed".force_encoding('windows-1255'), "\xef".force_encoding('windows-1255'), "\xf3".force_encoding('windows-1255'), "\xf5".force_encoding('windows-1255')]
@@ -18,6 +16,32 @@ HEB_UTF8_END = 1535
18
16
 
19
17
  # extend String class
20
18
  class String
19
+ def strip_hebrew
20
+ case self.encoding
21
+ when Encoding::UTF_8
22
+ strip_hebrew_utf8
23
+ when Encoding::WINDOWS_1255 || Encoding::CP1255
24
+ strip_hebrew_cp1255
25
+ end
26
+ end
27
+ def strip_hebrew_utf8
28
+ target = ''
29
+ self.each_codepoint {|cp|
30
+ unless self.class.is_codepoint_nikkud_utf8(cp) or self.is_hebrew_codepoint_utf8(cp)
31
+ target << cp.chr(Encoding::UTF_8)
32
+ end
33
+ }
34
+ return target
35
+ end
36
+ def strip_hebrew_cp1255
37
+ target = ''.force_encoding('windows-1255')
38
+ self.each_codepoint {|cp|
39
+ unless self.class.is_codepoint_nikkud_cp1255(cp) or self.is_hebrew_codepoint_cp1255(cp)
40
+ target << cp.chr(Encoding::CP1255) # is there a neater way?
41
+ end
42
+ }
43
+ return target
44
+ end
21
45
  # this will return the string, stripped of any Hebrew nikkud characters
22
46
  def strip_nikkud
23
47
  case self.encoding
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hebrew
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Asaf Bartov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-15 00:00:00.000000000 Z
11
+ date: 2019-09-05 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Some useful code to identify, transcode, and manipulate Hebrew text
14
14
  email: asaf.bartov@gmail.com
@@ -37,7 +37,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
37
37
  version: '0'
38
38
  requirements: []
39
39
  rubyforge_project:
40
- rubygems_version: 2.6.14
40
+ rubygems_version: 2.7.7
41
41
  signing_key:
42
42
  specification_version: 4
43
43
  summary: Hebrew string manipulation