persian 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 19996fd038d9500710629966a4f1d8376fccf274
4
- data.tar.gz: b2dfff05e57dbb3cb3cb313a5c2431b90a688fb0
2
+ SHA256:
3
+ metadata.gz: ad07e72f8e952adecef3078d3132de48372e936682eaf67dfc77745d863d24d5
4
+ data.tar.gz: 618f5b540864a034b4fc5bae2865a1c6de8f0fff50fb12eb1c28e4a506062d06
5
5
  SHA512:
6
- metadata.gz: b3d326f07258d4fc731220fc411a58c50a74f128178bf39b9389ba87a448e032986ac44996d1e1425bdd809589ec9aee9ddb6949b64ddd8cb8a7e965d84aa7eb
7
- data.tar.gz: 470411b5ae7d07d45f7a685a79c237c78e3294a59dfe785f539ef9fdba5ce5e4e784a0546c47dae8e3bf4d6e2773e3d38f64990d6ace98761a82f66f4a5ca917
6
+ metadata.gz: 5dd769632abf8da06746802aedbcfc83a3bb49edfec68364f068c476454469b38ba8e91a0ea721026202d025714a88ee4a969d76ea928f88b0a48af134cb6f71
7
+ data.tar.gz: ad6bea9eee317516acfe91241137896f474877531ef34c4b69fe1e297b7ca346757e88b9baf7e67dc16b01b9ddc86994900ae1b442ac2325cf59cc909c955c9b
data/.rubocop.yml CHANGED
@@ -27,3 +27,6 @@ Style/AsciiComments:
27
27
 
28
28
  Style/MethodMissing:
29
29
  Enabled: false
30
+
31
+ Style/ConstantName:
32
+ Enabled: false
@@ -0,0 +1,59 @@
1
+ module Persian
2
+ # Homonyms of persian
3
+ module Homonyms
4
+ include Alphabet
5
+
6
+ T = [
7
+ TE,
8
+ TA
9
+ ].freeze
10
+
11
+ S = [
12
+ THE,
13
+ SIN,
14
+ SAD
15
+ ].freeze
16
+
17
+ H = [
18
+ HE_JIMI,
19
+ HE_DOCHESHM
20
+ ].freeze
21
+
22
+ Z = [
23
+ ZAL,
24
+ ZE,
25
+ ZA,
26
+ ZAD
27
+ ].freeze
28
+
29
+ GH = [
30
+ GHEIN,
31
+ QAF
32
+ ].freeze
33
+
34
+ # List of all Homonyms classified in a hash
35
+ ALL = {
36
+ T: T,
37
+ S: S,
38
+ H: H,
39
+ Z: Z,
40
+ GH: GH
41
+ }.freeze
42
+
43
+ # List of all Homonyms bulk in array
44
+ ALL_a = [
45
+ T, S, H, Z, GH
46
+ ].flatten.freeze
47
+
48
+ # Hash reverse list of Homonyms
49
+ temp = {}
50
+
51
+ ALL.each do |key, value|
52
+ value.each do |i|
53
+ temp[i.to_s] = key
54
+ end
55
+ end
56
+
57
+ ALL_r = temp.freeze
58
+ end
59
+ end
@@ -20,29 +20,35 @@ module Persian
20
20
 
21
21
  # Remove Arabic harecats from text
22
22
  def self.remove_harekats(text)
23
- HAREKATS.each { |v| text.gsub!(v, '') }
23
+ HAREKATS.each { |v| text = text.gsub(v, '') }
24
24
  text
25
25
  end
26
26
 
27
27
  # Remove All barckets
28
28
  def self.remove_brackets(text)
29
- BRACKETS.each { |v| text.gsub!(v, '') }
29
+ BRACKETS.each { |v| text = text.gsub(v, '') }
30
30
  text
31
31
  end
32
32
 
33
33
  # Remove Persian signs
34
34
  def self.remove_signs(text, with = '')
35
- SIGNS.each { |v| text.gsub!(v, with) }
35
+ return '' if text.nil?
36
+ SIGNS.each { |v| text = text.gsub(v, with) }
37
+ text
38
+ end
39
+
40
+ def self.replace_zwnj_with_space(text)
41
+ text = text.gsub(/(‌)/, ' ')
36
42
  text
37
43
  end
38
44
 
39
45
  # Replace general brackets with one type brackets
40
46
  # Default: 0xAB & 0xBB
41
47
  def self.general_brackets(text, left = '«', right = '»')
42
- text.gsub!(/"(.*?)"/, left + '\1' + right)
43
- text.gsub!(/\[(.*?)\]/, left + '\1' + right)
44
- text.gsub!(/\{(.*?)\}/, left + '\1' + right)
45
- text.gsub!(/\((.*?)\)/, left + '\1' + right)
48
+ text = text.gsub(/"(.*?)"/, left + '\1' + right)
49
+ text = text.gsub(/\[(.*?)\]/, left + '\1' + right)
50
+ text = text.gsub(/\{(.*?)\}/, left + '\1' + right)
51
+ text = text.gsub(/\((.*?)\)/, left + '\1' + right)
46
52
  text
47
53
  end
48
54
 
@@ -193,5 +199,16 @@ module Persian
193
199
  text.gsub!(/(\.)(\S)/, '\1 \2')
194
200
  text
195
201
  end
202
+
203
+ def self.squeeze(text)
204
+ text.squeeze
205
+ end
206
+
207
+ # Remove specific character from end of text
208
+ # EXample: remove_postfix('پسره','ه')
209
+ def self.remove_postfix(text, postfix)
210
+ text.chomp!(postfix)
211
+ text
212
+ end
196
213
  end
197
214
  end
@@ -32,6 +32,20 @@ module Persian
32
32
  tokens.flatten
33
33
  end
34
34
 
35
+ def self.tokenize_more(text, num)
36
+ list = tokenize(text)
37
+ tokens = []
38
+ 0.upto list.size - num do |i|
39
+ token = ''
40
+ 0.upto num - 1 do |j|
41
+ token += list[i + j] + ' '
42
+ end
43
+ tokens.push token.strip
44
+ end
45
+
46
+ tokens
47
+ end
48
+
35
49
  # Split paragraphs
36
50
  # Return an array of paragraphs
37
51
  def self.split_paragraphs(text)
data/lib/persian/url.rb CHANGED
@@ -10,7 +10,7 @@ module Persian
10
10
  # remove harekats
11
11
  text = Text.remove_harekats(text)
12
12
  # remove slash and backslash
13
- text.gsub!(%r{(\/||\\)}, '')
13
+ text = text.gsub(%r{(\/||\\)}, '')
14
14
  # remove signs
15
15
  text = Text.remove_signs(text, ' ')
16
16
  # Remove extra spaces
@@ -18,7 +18,7 @@ module Persian
18
18
  # trim spaces from start and end of text
19
19
  text = text.strip
20
20
  # replace space with dash
21
- text.gsub!(/\s/, '-')
21
+ text = text.gsub(/\s/, '-')
22
22
  text
23
23
  end
24
24
  end
@@ -2,5 +2,5 @@
2
2
 
3
3
  # Persian module
4
4
  module Persian
5
- VERSION = '0.2.1'.freeze
5
+ VERSION = '0.2.2'.freeze
6
6
  end
data/lib/persian.rb CHANGED
@@ -4,6 +4,7 @@
4
4
  require 'persian/list/alphabet'
5
5
  require 'persian/list/number'
6
6
  require 'persian/list/character'
7
+ require 'persian/list/homonyms'
7
8
 
8
9
  # classes
9
10
  require 'persian/number'
data/persian.gemspec CHANGED
@@ -8,7 +8,7 @@ require 'persian/version'
8
8
  Gem::Specification.new do |s|
9
9
  s.name = 'persian'
10
10
  s.version = Persian::VERSION
11
- s.date = '2016-11-16'
11
+ s.date = '2022-03-25'
12
12
  s.summary = 'Persian language for ruby.'
13
13
  s.description = 'A set of utilities for Persian language.'
14
14
  s.authors = ['Dariush Abbasi']
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
19
19
  `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
20
20
  s.require_paths = ['lib']
21
21
  s.homepage =
22
- 'http://github.com/negah/persian'
22
+ 'http://github.com/dariubs/persian.rb'
23
23
  s.license = 'MIT'
24
24
 
25
25
  s.add_development_dependency 'rspec', '3.4'
data/readme.md CHANGED
@@ -1,27 +1,28 @@
1
1
  <p align="center">
2
2
  <img src="https://upload.wikimedia.org/wikipedia/commons/a/a2/Farsi.svg"
3
- height="130" alt="Persian">
4
- </p>
5
- <p align="center">
6
- <a href="https://travis-ci.org/negah/persian">
7
- <img src="https://travis-ci.org/negah/persian.svg?branch=master"
8
- alt="Build Status">
9
- </a>
10
- <a href="https://rubygems.org/gems/persian">
11
- <img src="https://img.shields.io/badge/gem-persian-orange.svg"
12
- alt="Ruby Gems">
13
- </a>
14
-
15
- <a href="https://rubygems.org/gems/persian">
16
- <img src="https://img.shields.io/gem/dv/persian/stable.svg?maxAge=2592000"
17
- alt="Ruby Gems downloads">
18
- </a>
19
-
20
- <a href="https://codeclimate.com/github/negah/persian">
21
- <img src="https://codeclimate.com/github/negah/persian/badges/gpa.svg"
22
- alt="Code Climate">
23
- </a>
3
+ height="130" alt="Persian ruby gem">
24
4
  </p>
5
+
6
+ <a href="https://travis-ci.org/negah/persian">
7
+ <img src="https://travis-ci.org/negah/persian.svg?branch=master"
8
+ alt="Build Status">
9
+ </a>
10
+
11
+ <a href="https://rubygems.org/gems/persian">
12
+ <img src="https://img.shields.io/badge/gem-persian-orange.svg"
13
+ alt="Ruby Gems">
14
+ </a>
15
+
16
+ <a href="https://rubygems.org/gems/persian">
17
+ <img src="https://img.shields.io/gem/dv/persian/stable.svg?maxAge=2592000"
18
+ alt="Ruby Gems downloads">
19
+ </a>
20
+
21
+ <a href="https://codeclimate.com/github/negah/persian">
22
+ <img src="https://codeclimate.com/github/negah/persian/badges/gpa.svg"
23
+ alt="Code Climate">
24
+ </a>
25
+
25
26
  <p align="center"><sup><strong> Ruby gem for working with Persian text. </strong></sup></p>
26
27
 
27
28
 
data/spec/text_spec.rb CHANGED
@@ -33,6 +33,13 @@ describe 'persian character methods' do
33
33
  expect(Persian::Text.remove_signs(before)).to eq(after)
34
34
  end
35
35
 
36
+ it 'should replace all zwnjs with space ' do
37
+ before = 'من‌در‌نیم‌فاصله‌ات‌اسیرم'
38
+ after = 'من در نیم فاصله ات اسیرم'
39
+
40
+ expect(Persian::Text.replace_zwnj_with_space(before)).to eq(after)
41
+ end
42
+
36
43
  it 'should replace [ & ], { & }, ( & ), " & " with « & »' do
37
44
  before_first = 'اگر اراده ای نباشد عشقی نیست. "گاندی"'
38
45
  after_first = 'اگر اراده ای نباشد عشقی نیست. «گاندی»'
@@ -233,4 +240,19 @@ describe 'persian character methods' do
233
240
  after = 'سلام. اسپیس کو؟'
234
241
  expect(Persian::Text.space_after_dot(text)).to eq(after)
235
242
  end
243
+
244
+ it 'should remove all repeated characters from text' do
245
+ text = 'سلااااااام.چی میکنییی؟؟؟؟؟'
246
+ after = 'سلام.چی میکنی؟'
247
+
248
+ expect(Persian::Text.squeeze(text)).to eq(after)
249
+ end
250
+
251
+ it 'should remove text postfix' do
252
+ text = 'پسره'
253
+ postfix = 'ه'
254
+ result = 'پسر'
255
+
256
+ expect(Persian::Text.remove_postfix(text, postfix)).to eq(result)
257
+ end
236
258
  end
@@ -10,6 +10,14 @@ describe 'persian tokenizers' do
10
10
  expect(Persian::Tokenizer.tokenize(before)).to eq(after)
11
11
  end
12
12
 
13
+ it 'should tokenize as a serie' do
14
+ text = 'سلام من به تو یار قدیمی'
15
+ parts = 3
16
+ result = ['سلام من به', 'من به تو', 'به تو یار', 'تو یار قدیمی']
17
+
18
+ expect(Persian::Tokenizer.tokenize_more(text, parts)).to eq(result)
19
+ end
20
+
13
21
  it 'should split paragraphs' do
14
22
  text = "
15
23
  یوهانس برامس در سال ۱۸۳۳ در شهر هامبورگ آلمان در خانواده‌ای فقیر به دنیا آمد. تحصیلات ابتدایی موسیقی را نزد پدرش که نوازنده کنترباس بود فرا گرفت.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: persian
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dariush Abbasi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-16 00:00:00.000000000 Z
11
+ date: 2022-03-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -43,6 +43,7 @@ files:
43
43
  - lib/persian/dynamic.rb
44
44
  - lib/persian/list/alphabet.rb
45
45
  - lib/persian/list/character.rb
46
+ - lib/persian/list/homonyms.rb
46
47
  - lib/persian/list/number.rb
47
48
  - lib/persian/num_text.rb
48
49
  - lib/persian/number.rb
@@ -63,7 +64,7 @@ files:
63
64
  - spec/tokenizer_spec.rb
64
65
  - spec/unicode_spec.rb
65
66
  - spec/url_spec.rb
66
- homepage: http://github.com/negah/persian
67
+ homepage: http://github.com/dariubs/persian.rb
67
68
  licenses:
68
69
  - MIT
69
70
  metadata: {}
@@ -82,8 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
82
83
  - !ruby/object:Gem::Version
83
84
  version: '0'
84
85
  requirements: []
85
- rubyforge_project:
86
- rubygems_version: 2.5.1
86
+ rubygems_version: 3.2.5
87
87
  signing_key:
88
88
  specification_version: 4
89
89
  summary: Persian language for ruby.