persian 0.0.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.editorconfig +9 -0
- data/.gitignore +51 -0
- data/.rspec +3 -0
- data/.rubocop.yml +32 -0
- data/.travis.yml +8 -0
- data/Gemfile +10 -0
- data/Rakefile +36 -0
- data/lib/persian/counter.rb +61 -0
- data/lib/persian/date.rb +150 -0
- data/lib/persian/dynamic.rb +38 -0
- data/lib/persian/list/alphabet.rb +107 -0
- data/lib/persian/list/character.rb +193 -0
- data/lib/persian/list/homonyms.rb +59 -0
- data/lib/persian/list/number.rb +168 -0
- data/lib/persian/num_text.rb +53 -0
- data/lib/persian/number.rb +81 -0
- data/lib/persian/text/keyboard.rb +22 -0
- data/lib/persian/text/text.rb +214 -0
- data/lib/persian/tokenizer.rb +56 -0
- data/lib/persian/unicode.rb +42 -0
- data/lib/persian/url.rb +25 -0
- data/lib/persian/version.rb +2 -1
- data/lib/persian.rb +16 -39
- data/persian.gemspec +26 -0
- data/readme.md +48 -0
- data/spec/counter_spec.rb +83 -0
- data/spec/dynamic_spec.rb +6 -0
- data/spec/num_text_spec.rb +17 -0
- data/spec/number_spec.rb +129 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/text_spec.rb +258 -0
- data/spec/tokenizer_spec.rb +31 -0
- data/spec/unicode_spec.rb +25 -0
- data/spec/url_spec.rb +11 -0
- metadata +42 -12
@@ -0,0 +1,214 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
# Persian module
|
4
|
+
module Persian
|
5
|
+
# Persian Text class
|
6
|
+
# Digest Persian texts
|
7
|
+
class Text
|
8
|
+
# Replace Arabic characters with Persian characters.
|
9
|
+
def self.character(text)
|
10
|
+
AR_FA_CHAR.each { |k, v| text.gsub!(k, v) }
|
11
|
+
text
|
12
|
+
end
|
13
|
+
|
14
|
+
# Remove extra spaces in text
|
15
|
+
def self.remove_extra_spaces(text)
|
16
|
+
text = text.split.join(' ')
|
17
|
+
text = text.split('').join('')
|
18
|
+
text
|
19
|
+
end
|
20
|
+
|
21
|
+
# Remove Arabic harecats from text
|
22
|
+
def self.remove_harekats(text)
|
23
|
+
HAREKATS.each { |v| text = text.gsub(v, '') }
|
24
|
+
text
|
25
|
+
end
|
26
|
+
|
27
|
+
# Remove All barckets
|
28
|
+
def self.remove_brackets(text)
|
29
|
+
BRACKETS.each { |v| text = text.gsub(v, '') }
|
30
|
+
text
|
31
|
+
end
|
32
|
+
|
33
|
+
# Remove Persian signs
|
34
|
+
def self.remove_signs(text, with = '')
|
35
|
+
return '' if text.nil?
|
36
|
+
SIGNS.each { |v| text = text.gsub(v, with) }
|
37
|
+
text
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.replace_zwnj_with_space(text)
|
41
|
+
text = text.gsub(/()/, ' ')
|
42
|
+
text
|
43
|
+
end
|
44
|
+
|
45
|
+
# Replace general brackets with one type brackets
|
46
|
+
# Default: 0xAB & 0xBB
|
47
|
+
def self.general_brackets(text, left = '«', right = '»')
|
48
|
+
text = text.gsub(/"(.*?)"/, left + '\1' + right)
|
49
|
+
text = text.gsub(/\[(.*?)\]/, left + '\1' + right)
|
50
|
+
text = text.gsub(/\{(.*?)\}/, left + '\1' + right)
|
51
|
+
text = text.gsub(/\((.*?)\)/, left + '\1' + right)
|
52
|
+
text
|
53
|
+
end
|
54
|
+
|
55
|
+
# Add 'ی' after names that end with ه, ا, و
|
56
|
+
def self.fix_y_after_vowel(text)
|
57
|
+
text += 'ی' if END_VOWEL.include? text[-1]
|
58
|
+
text
|
59
|
+
end
|
60
|
+
|
61
|
+
# Replace Space with Zero-width none-joiner after می and نمی
|
62
|
+
def self.replace_zwnj_mi(text)
|
63
|
+
mi = 'می'
|
64
|
+
nmi = 'نمی'
|
65
|
+
text.gsub!(/(^|\s)(#{mi}|#{nmi})\s(\S+)/, '\1\2\3')
|
66
|
+
text
|
67
|
+
end
|
68
|
+
|
69
|
+
# Resplace ست with \sاست if lastest character before \s is ا
|
70
|
+
def self.ast(text)
|
71
|
+
a = 'ا'
|
72
|
+
ast = 'است'
|
73
|
+
st = 'ست'
|
74
|
+
|
75
|
+
text.gsub!(/(#{a})\s(#{ast})/, '\1' + st)
|
76
|
+
text
|
77
|
+
end
|
78
|
+
|
79
|
+
# Remove keshide from text
|
80
|
+
def self.keshide(text)
|
81
|
+
text.gsub!(/ـ+/, '')
|
82
|
+
text
|
83
|
+
end
|
84
|
+
|
85
|
+
# Use ی instead of ئ if next char is ی
|
86
|
+
# Example پائیز => پاییز
|
87
|
+
def self.replace_e_y(text)
|
88
|
+
e = 'ئ'
|
89
|
+
y = 'ی'
|
90
|
+
text.gsub!(/#{e}(#{y})/, '\1\1')
|
91
|
+
text
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.three_dots(text)
|
95
|
+
text.gsub!(/\.{3,}/, '…')
|
96
|
+
text
|
97
|
+
end
|
98
|
+
|
99
|
+
def self.suffix(text)
|
100
|
+
tar = 'تر'
|
101
|
+
ee = 'ی'
|
102
|
+
n = 'ن'
|
103
|
+
ha = 'ها'
|
104
|
+
ye = 'ی'
|
105
|
+
text.gsub!(/\s+(#{tar}(#{ee}(#{n})?)?)|(#{ha}(#{ye})?)\s+/, '\1')
|
106
|
+
text
|
107
|
+
end
|
108
|
+
|
109
|
+
def self.remove_extra_question_mark(text)
|
110
|
+
mark = '؟'
|
111
|
+
text.gsub!(/(#{mark}){2,}/, '\1')
|
112
|
+
text
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.add_zwnj(text, point)
|
116
|
+
text = text.scan(/^.{#{point}}|.+/).join('')
|
117
|
+
text
|
118
|
+
end
|
119
|
+
|
120
|
+
def self.remove_question_exclamation(text)
|
121
|
+
question = '؟'
|
122
|
+
exclamation = '!'
|
123
|
+
text.gsub!(/(#{question})+(#{exclamation})+/, '\1\2')
|
124
|
+
text
|
125
|
+
end
|
126
|
+
|
127
|
+
def self.remove_stopwords(text)
|
128
|
+
stopwords = ['و', 'در', 'به', 'این', 'با', 'از', 'که', 'است', 'را']
|
129
|
+
words = text.scan(/\S+/)
|
130
|
+
keywords = words.select { |word| !stopwords.include?(word) }
|
131
|
+
keywords.join(' ')
|
132
|
+
end
|
133
|
+
|
134
|
+
def self.remove_space_noghtevirgool(text)
|
135
|
+
noghtevirgool = '؛'
|
136
|
+
text.gsub!(/\s+(#{noghtevirgool})/, '\1')
|
137
|
+
text
|
138
|
+
end
|
139
|
+
|
140
|
+
def self.remove_signs_after_noghtevirgool(text)
|
141
|
+
signs = '[\.،؛:!؟\-…]'
|
142
|
+
noghtevirgool = '؛'
|
143
|
+
text.gsub!(/(#{noghtevirgool})[#{signs}]+/, '\1')
|
144
|
+
text
|
145
|
+
end
|
146
|
+
|
147
|
+
def self.space_after_noghtevirgool(text)
|
148
|
+
noghtevirgool = '؛'
|
149
|
+
text.gsub!(/(#{noghtevirgool})(\S)/, '\1 \2')
|
150
|
+
text
|
151
|
+
end
|
152
|
+
|
153
|
+
def self.remove_noghtevirgool_para_end(text)
|
154
|
+
noghtevirgool = '؛'
|
155
|
+
text.gsub!(/#{noghtevirgool}(\n|$)/, '.\1')
|
156
|
+
text
|
157
|
+
end
|
158
|
+
|
159
|
+
def self.remove_noghtevirgool_baz_start(text)
|
160
|
+
noghtevirgool = '؛'
|
161
|
+
|
162
|
+
regex = /([\(\[«])[ ]*[#{noghtevirgool}]/
|
163
|
+
text.gsub!(regex, '\1')
|
164
|
+
text
|
165
|
+
end
|
166
|
+
|
167
|
+
def self.remove_space_before_virgool(text)
|
168
|
+
virgool = '،'
|
169
|
+
|
170
|
+
text.gsub!(/\s+(#{virgool})/, '\1')
|
171
|
+
text
|
172
|
+
end
|
173
|
+
|
174
|
+
def self.remove_signs_after_virgool(text)
|
175
|
+
pattern = /(،)([ ]+)?([،؛:!؟\-][\.،؛:!؟\-]*|\.(?!\.))/
|
176
|
+
|
177
|
+
text.gsub!(pattern, '\1\2')
|
178
|
+
text
|
179
|
+
end
|
180
|
+
|
181
|
+
def self.space_after_virgool(text)
|
182
|
+
virgool = '،'
|
183
|
+
|
184
|
+
text.gsub!(/(#{virgool})(\S)/, '\1 \2')
|
185
|
+
text
|
186
|
+
end
|
187
|
+
|
188
|
+
def self.rm_char(text, char)
|
189
|
+
text.gsub!(/(#{char})/, '')
|
190
|
+
text
|
191
|
+
end
|
192
|
+
|
193
|
+
def self.rm_virgool_in_end(text)
|
194
|
+
text.gsub!(/(،)([ \n]+)?$/, '.\2')
|
195
|
+
text
|
196
|
+
end
|
197
|
+
|
198
|
+
def self.space_after_dot(text)
|
199
|
+
text.gsub!(/(\.)(\S)/, '\1 \2')
|
200
|
+
text
|
201
|
+
end
|
202
|
+
|
203
|
+
def self.squeeze(text)
|
204
|
+
text.squeeze
|
205
|
+
end
|
206
|
+
|
207
|
+
# Remove specific character from end of text
|
208
|
+
# EXample: remove_postfix('پسره','ه')
|
209
|
+
def self.remove_postfix(text, postfix)
|
210
|
+
text.chomp!(postfix)
|
211
|
+
text
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
# Persian module
|
4
|
+
module Persian
|
5
|
+
# Persian tokenize class
|
6
|
+
class Tokenizer
|
7
|
+
# Basic persian word tokenizer
|
8
|
+
# Return an array of words
|
9
|
+
def self.tokenize(text)
|
10
|
+
symbols = ['!', '﷼', ':', '؛', '؟', '،', '-', '.']
|
11
|
+
pair_pre = ['(', '{', '«', '<', '[']
|
12
|
+
pair_post = [')', '}', '»', '>', ']']
|
13
|
+
prepost = ["'", '"']
|
14
|
+
|
15
|
+
# Split text with space characters
|
16
|
+
splits = text.split(/\s/)
|
17
|
+
|
18
|
+
return [''] if splits.empty?
|
19
|
+
|
20
|
+
options = symbols + pair_pre + pair_post + prepost
|
21
|
+
|
22
|
+
pattern = /[^#{Regexp.escape(options.join)}]+/
|
23
|
+
tokens = []
|
24
|
+
|
25
|
+
splits.each do |split|
|
26
|
+
first, middle, last = split.partition(pattern)
|
27
|
+
tokens << first.split unless first.empty?
|
28
|
+
tokens << middle unless middle.empty?
|
29
|
+
tokens << last.split unless last.empty?
|
30
|
+
end
|
31
|
+
|
32
|
+
tokens.flatten
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.tokenize_more(text, num)
|
36
|
+
list = tokenize(text)
|
37
|
+
tokens = []
|
38
|
+
0.upto list.size - num do |i|
|
39
|
+
token = ''
|
40
|
+
0.upto num - 1 do |j|
|
41
|
+
token += list[i + j] + ' '
|
42
|
+
end
|
43
|
+
tokens.push token.strip
|
44
|
+
end
|
45
|
+
|
46
|
+
tokens
|
47
|
+
end
|
48
|
+
|
49
|
+
# Split paragraphs
|
50
|
+
# Return an array of paragraphs
|
51
|
+
def self.split_paragraphs(text)
|
52
|
+
text = text.split("\n").reject(&:empty?)
|
53
|
+
text
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
# Persian module
|
4
|
+
module Persian
|
5
|
+
# Persian Unicode class
|
6
|
+
class Unicode
|
7
|
+
def self.codepoint_to_char(char)
|
8
|
+
return [char].pack('U') if char.is_a? Fixnum
|
9
|
+
[char.hex].pack('U')
|
10
|
+
end
|
11
|
+
|
12
|
+
# Return text between RIGHT-TO-LETF EMBEDDING(U+202B) and Pop Directional Format(U+202C)
|
13
|
+
def self.rle(text)
|
14
|
+
lre_tag = 0x202B
|
15
|
+
pop_tag = 0x202C
|
16
|
+
|
17
|
+
codepoint_to_char(lre_tag) + text + codepoint_to_char(pop_tag)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Return text between LETF-TO-RIGHT EMBEDDING(U+202A) and Pop Directional Format(U+202C)
|
21
|
+
def self.lre(text)
|
22
|
+
rle_tag = 0x202A
|
23
|
+
pop_tag = 0x202C
|
24
|
+
|
25
|
+
codepoint_to_char(rle_tag) + text + codepoint_to_char(pop_tag)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.rlo(text)
|
29
|
+
lro_tag = 0x202E
|
30
|
+
pop_tag = 0x202C
|
31
|
+
|
32
|
+
codepoint_to_char(lro_tag) + text + codepoint_to_char(pop_tag)
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.lro(text)
|
36
|
+
rlo_tag = 0x202D
|
37
|
+
pop_tag = 0x202C
|
38
|
+
|
39
|
+
codepoint_to_char(rlo_tag) + text + codepoint_to_char(pop_tag)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/persian/url.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
# Persian module
|
4
|
+
module Persian
|
5
|
+
# Persian Unicode class
|
6
|
+
class Url
|
7
|
+
def self.urlify(text)
|
8
|
+
# remove brackets
|
9
|
+
text = Text.remove_brackets(text)
|
10
|
+
# remove harekats
|
11
|
+
text = Text.remove_harekats(text)
|
12
|
+
# remove slash and backslash
|
13
|
+
text = text.gsub(%r{(\/||\\)}, '')
|
14
|
+
# remove signs
|
15
|
+
text = Text.remove_signs(text, ' ')
|
16
|
+
# Remove extra spaces
|
17
|
+
text = Text.remove_extra_spaces(text)
|
18
|
+
# trim spaces from start and end of text
|
19
|
+
text = text.strip
|
20
|
+
# replace space with dash
|
21
|
+
text = text.gsub(/\s/, '-')
|
22
|
+
text
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/persian/version.rb
CHANGED
data/lib/persian.rb
CHANGED
@@ -1,42 +1,19 @@
|
|
1
1
|
# -*- coding: UTF-8 -*-
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
# lists
|
4
|
+
require 'persian/list/alphabet'
|
5
|
+
require 'persian/list/number'
|
6
|
+
require 'persian/list/character'
|
7
|
+
require 'persian/list/homonyms'
|
5
8
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
"5" => "۵",
|
18
|
-
"6" => "۶",
|
19
|
-
"7" => "۷",
|
20
|
-
"8" => "۸",
|
21
|
-
"9" => "۹",
|
22
|
-
|
23
|
-
#arabic numbers
|
24
|
-
"٠" => "۰",
|
25
|
-
"١" => "۱",
|
26
|
-
"٢" => "۲",
|
27
|
-
"٣" => "۳",
|
28
|
-
"٤" => "۴",
|
29
|
-
"٥" => "۵",
|
30
|
-
"٦" => "۶",
|
31
|
-
"٧" => "۷",
|
32
|
-
"٨" => "۸",
|
33
|
-
"٩" => "۹",
|
34
|
-
}
|
35
|
-
|
36
|
-
nums.each {|k, v|
|
37
|
-
num.gsub!(k, v)
|
38
|
-
}
|
39
|
-
|
40
|
-
return num
|
41
|
-
end
|
42
|
-
end
|
9
|
+
# classes
|
10
|
+
require 'persian/number'
|
11
|
+
require 'persian/text/text'
|
12
|
+
require 'persian/text/keyboard'
|
13
|
+
require 'persian/num_text'
|
14
|
+
require 'persian/date'
|
15
|
+
require 'persian/tokenizer'
|
16
|
+
require 'persian/counter'
|
17
|
+
require 'persian/unicode'
|
18
|
+
require 'persian/dynamic'
|
19
|
+
require 'persian/url'
|
data/persian.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
|
6
|
+
require 'persian/version'
|
7
|
+
|
8
|
+
Gem::Specification.new do |s|
|
9
|
+
s.name = 'persian'
|
10
|
+
s.version = Persian::VERSION
|
11
|
+
s.date = '2022-03-25'
|
12
|
+
s.summary = 'Persian language for ruby.'
|
13
|
+
s.description = 'A set of utilities for Persian language.'
|
14
|
+
s.authors = ['Dariush Abbasi']
|
15
|
+
s.email = 'poshtehani@gmail.com'
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {spec}/*`.split("\n")
|
18
|
+
s.executables =
|
19
|
+
`git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
20
|
+
s.require_paths = ['lib']
|
21
|
+
s.homepage =
|
22
|
+
'http://github.com/dariubs/persian.rb'
|
23
|
+
s.license = 'MIT'
|
24
|
+
|
25
|
+
s.add_development_dependency 'rspec', '3.4'
|
26
|
+
end
|
data/readme.md
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
<p align="center">
|
2
|
+
<img src="https://upload.wikimedia.org/wikipedia/commons/a/a2/Farsi.svg"
|
3
|
+
height="130" alt="Persian ruby gem">
|
4
|
+
</p>
|
5
|
+
|
6
|
+
<a href="https://travis-ci.org/negah/persian">
|
7
|
+
<img src="https://travis-ci.org/negah/persian.svg?branch=master"
|
8
|
+
alt="Build Status">
|
9
|
+
</a>
|
10
|
+
|
11
|
+
<a href="https://rubygems.org/gems/persian">
|
12
|
+
<img src="https://img.shields.io/badge/gem-persian-orange.svg"
|
13
|
+
alt="Ruby Gems">
|
14
|
+
</a>
|
15
|
+
|
16
|
+
<a href="https://rubygems.org/gems/persian">
|
17
|
+
<img src="https://img.shields.io/gem/dv/persian/stable.svg?maxAge=2592000"
|
18
|
+
alt="Ruby Gems downloads">
|
19
|
+
</a>
|
20
|
+
|
21
|
+
<a href="https://codeclimate.com/github/negah/persian">
|
22
|
+
<img src="https://codeclimate.com/github/negah/persian/badges/gpa.svg"
|
23
|
+
alt="Code Climate">
|
24
|
+
</a>
|
25
|
+
|
26
|
+
<p align="center"><sup><strong> Ruby gem for working with Persian text. </strong></sup></p>
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
Install
|
31
|
+
-----
|
32
|
+
```shell
|
33
|
+
gem install persian
|
34
|
+
```
|
35
|
+
|
36
|
+
Usage
|
37
|
+
-----
|
38
|
+
```ruby
|
39
|
+
require 'persian'
|
40
|
+
```
|
41
|
+
|
42
|
+
Components
|
43
|
+
----------
|
44
|
+
incomplete.
|
45
|
+
|
46
|
+
License
|
47
|
+
-------
|
48
|
+
Released under the MIT License.
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe 'persian counter methods' do
|
6
|
+
it 'should return a hash of characters with their number of occurrence' do
|
7
|
+
before = 'من غلام قمرم غیر قمر هیچ مگو'
|
8
|
+
after = {
|
9
|
+
'م' => 6,
|
10
|
+
'ن' => 1,
|
11
|
+
' ' => 6,
|
12
|
+
'غ' => 2,
|
13
|
+
'ل' => 1,
|
14
|
+
'ا' => 1,
|
15
|
+
'ق' => 2,
|
16
|
+
'ر' => 3,
|
17
|
+
'ی' => 2,
|
18
|
+
'ه' => 1,
|
19
|
+
'چ' => 1,
|
20
|
+
'گ' => 1,
|
21
|
+
'و' => 1
|
22
|
+
}
|
23
|
+
arg = 'غ'
|
24
|
+
after_with_arg = 2
|
25
|
+
|
26
|
+
expect(Persian::Counter.character(before)).to eq(after)
|
27
|
+
expect(Persian::Counter.character(before, arg)).to eq(after_with_arg)
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'should return a hash of words as key and number of occurrence of word as value' do
|
31
|
+
before = 'پرچم دوران هخامنشی به احتمال زیاد عقابی با بال های گشوده با قرص خورشیدی در پشت سر عقاب بوده است'
|
32
|
+
after = {
|
33
|
+
'پرچم' => 1,
|
34
|
+
'دوران' => 1,
|
35
|
+
'هخامنشی' => 1,
|
36
|
+
'به' => 1,
|
37
|
+
'احتمال' => 1,
|
38
|
+
'زیاد' => 1,
|
39
|
+
'عقابی' => 1,
|
40
|
+
'با' => 2,
|
41
|
+
'بال' => 1,
|
42
|
+
'های' => 1,
|
43
|
+
'گشوده' => 1,
|
44
|
+
'قرص' => 1,
|
45
|
+
'خورشیدی' => 1,
|
46
|
+
'در' => 1,
|
47
|
+
'پشت' => 1,
|
48
|
+
'سر' => 1,
|
49
|
+
'عقاب' => 1,
|
50
|
+
'بوده' => 1,
|
51
|
+
'است' => 1
|
52
|
+
}
|
53
|
+
arg = 'با'
|
54
|
+
after_with_arg = 2
|
55
|
+
|
56
|
+
expect(Persian::Counter.word(before)).to eq(after)
|
57
|
+
expect(Persian::Counter.word(before, arg)).to eq(after_with_arg)
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'should return number of paragraphs' do
|
61
|
+
text = "
|
62
|
+
یوهانس برامس در سال ۱۸۳۳ در شهر هامبورگ آلمان در خانوادهای فقیر به دنیا آمد. تحصیلات ابتدایی موسیقی را نزد پدرش که نوازنده کنترباس بود فرا گرفت.
|
63
|
+
برامس با ویولونیستهای مشهوری چون رمنی و یواخیم آشنا شد و در طول این آشنایی بود که رمنی موسیقی محلی مجارستان را به برامس معرفی کرد و تحت تأثیر آن برامس رقصهای مجار خود را نوشت.
|
64
|
+
"
|
65
|
+
after = 2
|
66
|
+
|
67
|
+
expect(Persian::Counter.paragraph(text)).to eq(after)
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'shoud count uniq characters' do
|
71
|
+
text = 'دوستت دارم'
|
72
|
+
size = 8
|
73
|
+
|
74
|
+
expect(Persian::Counter.uniq_character(text)).to eq(size)
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'shoud return length of text' do
|
78
|
+
text = 'راهی بزن که آهی بر ساز آن توان زد'
|
79
|
+
size = 33
|
80
|
+
|
81
|
+
expect(Persian::Counter.character_counter(text)).to eq(size)
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe 'persian number to character methods' do
|
6
|
+
it 'should convert english numbers to spelled persian character' do
|
7
|
+
before = 1234
|
8
|
+
after = 'یک هزار و دویست و سی و چهار'
|
9
|
+
expect(Persian::NumText.num_to_char(before)).to eq(after)
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should convert Persian numbers to spelled persian number' do
|
13
|
+
before = '۲۰۴۸۲۰۴۸'
|
14
|
+
after = 'بیست میلیون و چهارصد و هشتاد و دو هزار و چهل و هشت'
|
15
|
+
expect(Persian::NumText.num_to_char(before)).to eq(after)
|
16
|
+
end
|
17
|
+
end
|