hebrew 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/hebrew.rb +23 -0
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2cc64efe3ce4523383c76a9265c8e0d015f847c1aaefacb903470ec9c295f342
|
4
|
+
data.tar.gz: e0f065db2c27e3dd0b47669abc629eb228e4f37b118db54afa128203d832e8e5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f5143397a2bed96cccc2aead3dee8aac7f7f28637819e1979b61cf37e1bb96712b29e6a7f1f218c6281b4bdda4e67435ce2a5f8d0195c7982bc088adfcdb9fb4
|
7
|
+
data.tar.gz: 8ec1ba901eb082f53f1d8caa1049a5e48302418c48867b11e67e945ef50ef42112e735114fae64df1e0adf0a66296de188ab59cca419779a8d650afc8026aab9
|
data/lib/hebrew.rb
CHANGED
@@ -13,6 +13,10 @@ FIANLS_CP1255 = ["\xea".force_encoding('windows-1255'), "\xed".force_encoding('w
|
|
13
13
|
FINALS_UTF8 = ["\u05da", "\u05dd", "\u05df", "\u05e3", "\u05e5"]
|
14
14
|
HEB_UTF8_START = 1424
|
15
15
|
HEB_UTF8_END = 1535
|
16
|
+
HEB_UTF8_XIRIK = 1460
|
17
|
+
HEB_UTF8_XOLAM = 1465
|
18
|
+
HEB_UTF8_QUBBUTS = 1467
|
19
|
+
HEB_UTF8_SHURUK = 1468
|
16
20
|
|
17
21
|
# extend String class
|
18
22
|
class String
|
@@ -87,6 +91,25 @@ class String
|
|
87
91
|
false
|
88
92
|
end
|
89
93
|
|
94
|
+
# this will add matres lectionis (yods and vavs as vowels) after diacritics that denote those vowels. The result won't always be morphologically correct Hebrew, but is useful for generating mostly-likely variants users may search for, when typing inputs (almost no Hebrew users know how to produce diacritics on the keyboard).
|
95
|
+
def naive_full_nikkud
|
96
|
+
ret = ''
|
97
|
+
prev_char = nil
|
98
|
+
case self.encoding
|
99
|
+
when Encoding::UTF_8
|
100
|
+
self.each_char do |c|
|
101
|
+
ret += c
|
102
|
+
ret += 'י' if c.codepoints[0] == HEB_UTF8_XIRIK
|
103
|
+
ret += 'ו' if c.codepoints[0] == HEB_UTF8_QUBBUTS
|
104
|
+
ret += 'ו' if [HEB_UTF8_XOLAM, HEB_UTF8_SHURUK].include?(c.codepoints[0]) && prev_char != 'ו'
|
105
|
+
prev_char = c
|
106
|
+
end
|
107
|
+
return ret.gsub('יי','ִי') # get rid of extraneous yods possibly added because we weren't looking ahead
|
108
|
+
else
|
109
|
+
return nil # not implemented for other encodings for now.
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
90
113
|
def any_nikkud?
|
91
114
|
func = case self.encoding
|
92
115
|
when Encoding::UTF_8
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hebrew
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Asaf Bartov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Some useful code to identify, transcode, and manipulate Hebrew text
|
14
14
|
email: asaf.bartov@gmail.com
|
@@ -36,8 +36,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
36
36
|
- !ruby/object:Gem::Version
|
37
37
|
version: '0'
|
38
38
|
requirements: []
|
39
|
-
|
40
|
-
rubygems_version: 2.7.7
|
39
|
+
rubygems_version: 3.1.2
|
41
40
|
signing_key:
|
42
41
|
specification_version: 4
|
43
42
|
summary: Hebrew string manipulation
|