persian 0.0.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.editorconfig +9 -0
- data/.gitignore +51 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.travis.yml +8 -0
- data/Gemfile +10 -0
- data/Rakefile +36 -0
- data/lib/persian/counter.rb +61 -0
- data/lib/persian/date.rb +150 -0
- data/lib/persian/dynamic.rb +38 -0
- data/lib/persian/list/alphabet.rb +107 -0
- data/lib/persian/list/character.rb +193 -0
- data/lib/persian/list/homonyms.rb +59 -0
- data/lib/persian/list/number.rb +168 -0
- data/lib/persian/num_text.rb +53 -0
- data/lib/persian/number.rb +81 -0
- data/lib/persian/text/keyboard.rb +22 -0
- data/lib/persian/text/text.rb +214 -0
- data/lib/persian/tokenizer.rb +56 -0
- data/lib/persian/unicode.rb +42 -0
- data/lib/persian/url.rb +25 -0
- data/lib/persian/version.rb +2 -1
- data/lib/persian.rb +16 -39
- data/persian.gemspec +26 -0
- data/readme.md +48 -0
- data/spec/counter_spec.rb +83 -0
- data/spec/dynamic_spec.rb +6 -0
- data/spec/num_text_spec.rb +17 -0
- data/spec/number_spec.rb +129 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/text_spec.rb +258 -0
- data/spec/tokenizer_spec.rb +31 -0
- data/spec/unicode_spec.rb +25 -0
- data/spec/url_spec.rb +11 -0
- metadata +42 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ad07e72f8e952adecef3078d3132de48372e936682eaf67dfc77745d863d24d5
|
4
|
+
data.tar.gz: 618f5b540864a034b4fc5bae2865a1c6de8f0fff50fb12eb1c28e4a506062d06
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5dd769632abf8da06746802aedbcfc83a3bb49edfec68364f068c476454469b38ba8e91a0ea721026202d025714a88ee4a969d76ea928f88b0a48af134cb6f71
|
7
|
+
data.tar.gz: ad6bea9eee317516acfe91241137896f474877531ef34c4b69fe1e297b7ca346757e88b9baf7e67dc16b01b9ddc86994900ae1b442ac2325cf59cc909c955c9b
|
data/.editorconfig
ADDED
data/.gitignore
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
## Specific to RubyMotion:
|
17
|
+
.dat*
|
18
|
+
.repl_history
|
19
|
+
build/
|
20
|
+
*.bridgesupport
|
21
|
+
build-iPhoneOS/
|
22
|
+
build-iPhoneSimulator/
|
23
|
+
|
24
|
+
## Specific to RubyMotion (use of CocoaPods):
|
25
|
+
#
|
26
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
27
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
28
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
29
|
+
#
|
30
|
+
# vendor/Pods/
|
31
|
+
|
32
|
+
## Documentation cache and generated files:
|
33
|
+
/.yardoc/
|
34
|
+
/_yardoc/
|
35
|
+
/doc/
|
36
|
+
/rdoc/
|
37
|
+
|
38
|
+
## Environment normalization:
|
39
|
+
/.bundle/
|
40
|
+
/vendor/bundle
|
41
|
+
/lib/bundler/man/
|
42
|
+
|
43
|
+
# for a library or gem, you might want to ignore these files since the code is
|
44
|
+
# intended to run in multiple environments; otherwise, check them in:
|
45
|
+
# Gemfile.lock
|
46
|
+
# .ruby-version
|
47
|
+
# .ruby-gemset
|
48
|
+
|
49
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
50
|
+
.rvmrc
|
51
|
+
Gemfile.lock
|
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
Metrics/LineLength:
|
2
|
+
Max: 300
|
3
|
+
|
4
|
+
Metrics/MethodLength:
|
5
|
+
Max: 75
|
6
|
+
|
7
|
+
Metrics/ClassLength:
|
8
|
+
Max: 2000
|
9
|
+
|
10
|
+
Metrics/AbcSize:
|
11
|
+
Max: 69
|
12
|
+
|
13
|
+
Metrics/CyclomaticComplexity:
|
14
|
+
Max: 14
|
15
|
+
|
16
|
+
Metrics/PerceivedComplexity:
|
17
|
+
Max: 18
|
18
|
+
|
19
|
+
Style/BracesAroundHashParameters:
|
20
|
+
EnforcedStyle: context_dependent
|
21
|
+
|
22
|
+
Style/WordArray:
|
23
|
+
EnforcedStyle: brackets
|
24
|
+
|
25
|
+
Style/AsciiComments:
|
26
|
+
Enabled: false
|
27
|
+
|
28
|
+
Style/MethodMissing:
|
29
|
+
Enabled: false
|
30
|
+
|
31
|
+
Style/ConstantName:
|
32
|
+
Enabled: false
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
lib = File.expand_path('../lib', __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
|
4
|
+
require 'persian/version'
|
5
|
+
|
6
|
+
desc 'Build and install persian gem'
|
7
|
+
task default: [:build, :install]
|
8
|
+
|
9
|
+
desc 'Build persian gem'
|
10
|
+
task :build do
|
11
|
+
sh('gem build persian.gemspec')
|
12
|
+
end
|
13
|
+
|
14
|
+
desc 'Install builded lastest version of gem'
|
15
|
+
task :install do
|
16
|
+
sh('gem install persian-' + Persian::VERSION + '.gem')
|
17
|
+
end
|
18
|
+
|
19
|
+
desc 'Run Rspec specs'
|
20
|
+
task :rspec do
|
21
|
+
sh('rspec')
|
22
|
+
end
|
23
|
+
|
24
|
+
desc 'Check code style with rubocop'
|
25
|
+
task :rubocop do
|
26
|
+
sh('rubocop')
|
27
|
+
end
|
28
|
+
|
29
|
+
desc 'Run code tests'
|
30
|
+
task test: [:rspec, :rubocop] do
|
31
|
+
end
|
32
|
+
|
33
|
+
desc 'Generate yard docs in doc/ directory'
|
34
|
+
task :doc do
|
35
|
+
sh('yard doc')
|
36
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
# Persian Module
|
4
|
+
module Persian
|
5
|
+
# Persian count class
|
6
|
+
# Basic counters for persian chars, texts, sentences and paragraphs
|
7
|
+
class Counter
|
8
|
+
# Return list a hash with list of characters in the text
|
9
|
+
# Hash key is the character and Hash value is number of occurrence
|
10
|
+
def self.character(text, char = nil)
|
11
|
+
list = text.split(//)
|
12
|
+
occurrence = {}
|
13
|
+
occurrence.default = 0
|
14
|
+
|
15
|
+
list.each do |item|
|
16
|
+
occurrence[item] += 1
|
17
|
+
end
|
18
|
+
|
19
|
+
if char.nil?
|
20
|
+
occurrence
|
21
|
+
else
|
22
|
+
occurrence[char]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Return number of uniq characters used in text
|
27
|
+
def self.uniq_character(text)
|
28
|
+
text = text.split(//)
|
29
|
+
text.uniq.size
|
30
|
+
end
|
31
|
+
|
32
|
+
# Return how many character text is
|
33
|
+
def self.character_counter(text)
|
34
|
+
text.length
|
35
|
+
end
|
36
|
+
|
37
|
+
# Return list a hash with list of words in the text
|
38
|
+
# Hash key is the word and Hash value is number of occurrence
|
39
|
+
def self.word(text, keyword = nil)
|
40
|
+
list = Persian::Tokenizer.tokenize(text)
|
41
|
+
occurrence = {}
|
42
|
+
occurrence.default = 0
|
43
|
+
|
44
|
+
list.each do |item|
|
45
|
+
occurrence[item] += 1
|
46
|
+
end
|
47
|
+
|
48
|
+
if keyword.nil?
|
49
|
+
occurrence
|
50
|
+
else
|
51
|
+
occurrence[keyword]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Return number of paragraph in text
|
56
|
+
def self.paragraph(text)
|
57
|
+
list = Persian::Tokenizer.split_paragraphs text
|
58
|
+
list.length
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
data/lib/persian/date.rb
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
# Persian Module
|
4
|
+
module Persian
|
5
|
+
# Persian count class
|
6
|
+
# Basic counters for persian chars, texts, sentences and paragraphs
|
7
|
+
class Date
|
8
|
+
def to_jalali(gy, gm, gd)
|
9
|
+
d2j(g2d(gy, gm, gd))
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_gregorian(jy, jm, jd)
|
13
|
+
d2g(j2d(jy, jm, jd))
|
14
|
+
end
|
15
|
+
|
16
|
+
def valid_jalali_date?(jy, jm, jd)
|
17
|
+
jy >= -61 && jy <= 3177 && jm >= 1 && jm <= 12 && jd >= 1 && jd <= jalali_month_length(jy, jm)
|
18
|
+
end
|
19
|
+
|
20
|
+
def leap_jalali_year?(jy)
|
21
|
+
jal_cal(jy)[:leap].zero?
|
22
|
+
end
|
23
|
+
|
24
|
+
def jalali_month_length(jy, jm)
|
25
|
+
return 31 if jm <= 6
|
26
|
+
return 30 if jm <= 11
|
27
|
+
return 30 if leap_jalali_year?(jy)
|
28
|
+
29
|
29
|
+
end
|
30
|
+
|
31
|
+
def jal_cal(jy)
|
32
|
+
breaks = [-61, 9, 38, 199, 426, 686, 756, 818, 1111, 1181, 1210,
|
33
|
+
1635, 2060, 2097, 2192, 2262, 2324, 2394, 2456, 3178]
|
34
|
+
|
35
|
+
bl = breaks.size
|
36
|
+
gy = jy + 621
|
37
|
+
leap_j = -14
|
38
|
+
jp = breaks[0]
|
39
|
+
|
40
|
+
raise Error if jy < jp || jy >= breaks[bl - 1]
|
41
|
+
|
42
|
+
jump = nil
|
43
|
+
|
44
|
+
1.upto(bl - 1) do |i|
|
45
|
+
jm = breaks[i]
|
46
|
+
jump = jm - jp
|
47
|
+
|
48
|
+
break if jy < jm
|
49
|
+
|
50
|
+
leap_j = leap_j + div(jump, 33) * 8 + div(mod(jump, 33), 4)
|
51
|
+
jp = jm
|
52
|
+
end
|
53
|
+
|
54
|
+
n = jy - jp
|
55
|
+
|
56
|
+
leap_j = leap_j + div(n, 33) * 8 + div(mod(n, 33), 4)
|
57
|
+
|
58
|
+
leap_j += 1 if mod(jump, 33) == 4 && jump - n == 4
|
59
|
+
|
60
|
+
leap_g = div(gy, 4) - div((div(gy, 100) + 1) * 3, 4) - 150
|
61
|
+
|
62
|
+
march = 20 + leap_j - leap_g
|
63
|
+
|
64
|
+
n = n - jump + div(jump + 4, 33) * 33 if jump - n < 6
|
65
|
+
|
66
|
+
leap = mod(mod(n + 1, 33) - 1, 4)
|
67
|
+
|
68
|
+
leap = 4 if leap == -1
|
69
|
+
|
70
|
+
{ leap: leap,
|
71
|
+
gy: gy,
|
72
|
+
march: march }
|
73
|
+
end
|
74
|
+
|
75
|
+
def j2d(jy, jm, jd)
|
76
|
+
r = jal_cal(jy)
|
77
|
+
g2d(r[:gy], 3, r[:march]) + (jm - 1) * 31 - div(jm, 7) * (jm - 7) + jd - 1
|
78
|
+
end
|
79
|
+
|
80
|
+
def d2j(jdn)
|
81
|
+
gy = d2g(jdn)[:gy]
|
82
|
+
jy = gy - 621
|
83
|
+
r = jal_cal(jy)
|
84
|
+
jdn1f = g2d(gy, 3, r[:march])
|
85
|
+
|
86
|
+
k = jdn - jdn1f
|
87
|
+
if k >= 0
|
88
|
+
(
|
89
|
+
if k <= 185
|
90
|
+
jm = 1 + div(k, 31)
|
91
|
+
jd = mod(k, 31) + 1
|
92
|
+
return {
|
93
|
+
jy: jy,
|
94
|
+
jm: jm,
|
95
|
+
jd: jd
|
96
|
+
}
|
97
|
+
else
|
98
|
+
k -= 186
|
99
|
+
end
|
100
|
+
)
|
101
|
+
else
|
102
|
+
jy -= 1
|
103
|
+
k += 179
|
104
|
+
k += 1 if r[:leap] == 1
|
105
|
+
end
|
106
|
+
jm = 7 + div(k, 30)
|
107
|
+
jd = mod(k, 30) + 1
|
108
|
+
|
109
|
+
{
|
110
|
+
jy: jy,
|
111
|
+
jm: jm,
|
112
|
+
jd: jd
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
def g2d(gy, gm, gd)
|
117
|
+
d = div((gy + div(gm - 8, 6) + 100_100) * 1_461, 4) + div(153 * mod(gm + 9, 12) + 2, 5) + gd - 34_840_408
|
118
|
+
d - div(div(gy + 100_100 + div(gm - 8, 6), 100) * 3, 4) + 752
|
119
|
+
end
|
120
|
+
|
121
|
+
def d2g(jdn)
|
122
|
+
j = 4 * jdn + 139_361_631
|
123
|
+
j = j + div(div(4 * jdn + 183_187_720, 146_097) * 3, 4) * 4 - 3_908
|
124
|
+
i = div(mod(j, 1_461), 4) * 5 + 308
|
125
|
+
gd = div(mod(i, 153), 5) + 1
|
126
|
+
gm = mod(div(i, 153), 12) + 1
|
127
|
+
gy = div(j, 1_461) - 100_100 + div(8 - gm, 6)
|
128
|
+
|
129
|
+
{ gy: gy,
|
130
|
+
gm: gm,
|
131
|
+
gd: gd }
|
132
|
+
end
|
133
|
+
|
134
|
+
def div(a, b)
|
135
|
+
tilde(a / b.to_f)
|
136
|
+
end
|
137
|
+
|
138
|
+
def mod(a, b)
|
139
|
+
a - tilde(a / b.to_f) * b
|
140
|
+
end
|
141
|
+
|
142
|
+
def tilde(num)
|
143
|
+
if num < 0
|
144
|
+
num.ceil
|
145
|
+
else
|
146
|
+
num.floor
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
# Persian Dynamic methods
|
4
|
+
module Persian
|
5
|
+
# Persian Text class :: Dynamic methods
|
6
|
+
class Text
|
7
|
+
def self.method_missing(method, *arg, &block)
|
8
|
+
# remove methods
|
9
|
+
if method.to_s =~ /^remove_\w*/
|
10
|
+
# get method characters without remove_
|
11
|
+
char = method.to_s.gsub(/^remove_(\w*)/, '\1').upcase
|
12
|
+
|
13
|
+
# execute remove_character if char is a valid constant
|
14
|
+
if constant? char
|
15
|
+
text = Persian.rm_char(arg[0], get_constant(char))
|
16
|
+
text
|
17
|
+
else
|
18
|
+
super
|
19
|
+
end
|
20
|
+
else
|
21
|
+
# Run default no method error
|
22
|
+
super
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.respond_to_missing?(method, include_private = false)
|
27
|
+
method.to_s.start_with?('remove_') || super
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.constant?(const_name)
|
31
|
+
Persian.const_defined?(const_name)
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.get_constant(const_name)
|
35
|
+
Persian.const_get(const_name)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
module Persian
|
2
|
+
module Alphabet
|
3
|
+
ALEF = 'ا'.freeze
|
4
|
+
ALEF_MAD = 'آ'.freeze
|
5
|
+
BE = 'ب'.freeze
|
6
|
+
PE = 'پ'.freeze
|
7
|
+
TE = 'ت'.freeze
|
8
|
+
THE = 'ث'.freeze
|
9
|
+
JIM = 'ج'.freeze
|
10
|
+
CHE = 'چ'.freeze
|
11
|
+
HE_JIMI = 'ح'.freeze
|
12
|
+
KHE = 'خ'.freeze
|
13
|
+
DAL = 'د'.freeze
|
14
|
+
ZAL = 'ذ'.freeze
|
15
|
+
RE = 'ر'.freeze
|
16
|
+
ZE = 'ز'.freeze
|
17
|
+
ZHE = 'ژ'.freeze
|
18
|
+
SIN = 'س'.freeze
|
19
|
+
SHIN = 'ش'.freeze
|
20
|
+
SAD = 'ص'.freeze
|
21
|
+
ZAD = 'ض'.freeze
|
22
|
+
TA = 'ط'.freeze
|
23
|
+
ZA = 'ظ'.freeze
|
24
|
+
EIN = 'ع'.freeze
|
25
|
+
GHEIN = 'غ'.freeze
|
26
|
+
FE = 'ف'.freeze
|
27
|
+
QAF = 'ق'.freeze
|
28
|
+
KAF = 'ک'.freeze
|
29
|
+
GAF = 'گ'.freeze
|
30
|
+
LAM = 'ل'.freeze
|
31
|
+
MIM = 'م'.freeze
|
32
|
+
NOON = 'ن'.freeze
|
33
|
+
VAV = 'و'.freeze
|
34
|
+
HE_DOCHESHM = 'ه'.freeze
|
35
|
+
YE = 'ی'.freeze
|
36
|
+
|
37
|
+
KAF_ARABIC = 'ك'.freeze
|
38
|
+
YE_ARABIC = 'ي'.freeze
|
39
|
+
|
40
|
+
MAD = 'ٓ'.freeze
|
41
|
+
|
42
|
+
AA = 'َ'.freeze
|
43
|
+
EE = 'ِ'.freeze
|
44
|
+
OO = 'ُ'.freeze
|
45
|
+
|
46
|
+
AN = 'ً'.freeze
|
47
|
+
EN = 'ٍ'.freeze
|
48
|
+
ON = 'ٌ'.freeze
|
49
|
+
|
50
|
+
SAKEN = 'ْ'.freeze
|
51
|
+
TASHDID = 'ّ'.freeze
|
52
|
+
|
53
|
+
SPACE = ' '.freeze
|
54
|
+
ZWNJ = ''.freeze
|
55
|
+
ZWJ = ''.freeze
|
56
|
+
|
57
|
+
NOGHTE = '.'.freeze
|
58
|
+
VIRGOOL = '،'.freeze
|
59
|
+
DONOGHTE = ':'.freeze
|
60
|
+
NOGHTEVIRGOOL = '؛'.freeze
|
61
|
+
|
62
|
+
TAAJOB = '!'.freeze
|
63
|
+
SOAL = '؟'.freeze
|
64
|
+
|
65
|
+
BEALAVE = '+'.freeze
|
66
|
+
DARSAD = '٪'.freeze
|
67
|
+
MENHA = '-'.freeze
|
68
|
+
MOSAVI = '='.freeze
|
69
|
+
TAGHSIM = '÷'.freeze
|
70
|
+
ZARBDAR = '×'.freeze
|
71
|
+
|
72
|
+
KESH = 'ـ'.freeze
|
73
|
+
|
74
|
+
SEFR = '۰'.freeze
|
75
|
+
YEK = '۱'.freeze
|
76
|
+
DOW = '۲'.freeze
|
77
|
+
SE = '۳'.freeze
|
78
|
+
CHAHAR = '۴'.freeze
|
79
|
+
PANJ = '۵'.freeze
|
80
|
+
SHESH = '۶'.freeze
|
81
|
+
HAFT = '۷'.freeze
|
82
|
+
HASHT = '۸'.freeze
|
83
|
+
NOH = '۹'.freeze
|
84
|
+
|
85
|
+
SIFR = '٠'.freeze
|
86
|
+
WAHID = '١'.freeze
|
87
|
+
ATHNAN = '٢'.freeze
|
88
|
+
THALETH = '٣'.freeze
|
89
|
+
ARBE = '٤'.freeze
|
90
|
+
KHAMSE = '٥'.freeze
|
91
|
+
SETE = '٦'.freeze
|
92
|
+
SABE = '٧'.freeze
|
93
|
+
THMANY = '٨'.freeze
|
94
|
+
LAYS = '٩'.freeze
|
95
|
+
|
96
|
+
ZERO = '0'.freeze
|
97
|
+
ONE = '1'.freeze
|
98
|
+
TWO = '2'.freeze
|
99
|
+
THREE = '3'.freeze
|
100
|
+
FOUR = '4'.freeze
|
101
|
+
FIVE = '5'.freeze
|
102
|
+
SIX = '6'.freeze
|
103
|
+
SEVEN = '7'.freeze
|
104
|
+
EIGHT = '8'.freeze
|
105
|
+
NINE = '9'.freeze
|
106
|
+
end
|
107
|
+
end
|