persian 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 19996fd038d9500710629966a4f1d8376fccf274
4
- data.tar.gz: b2dfff05e57dbb3cb3cb313a5c2431b90a688fb0
2
+ SHA256:
3
+ metadata.gz: ad07e72f8e952adecef3078d3132de48372e936682eaf67dfc77745d863d24d5
4
+ data.tar.gz: 618f5b540864a034b4fc5bae2865a1c6de8f0fff50fb12eb1c28e4a506062d06
5
5
  SHA512:
6
- metadata.gz: b3d326f07258d4fc731220fc411a58c50a74f128178bf39b9389ba87a448e032986ac44996d1e1425bdd809589ec9aee9ddb6949b64ddd8cb8a7e965d84aa7eb
7
- data.tar.gz: 470411b5ae7d07d45f7a685a79c237c78e3294a59dfe785f539ef9fdba5ce5e4e784a0546c47dae8e3bf4d6e2773e3d38f64990d6ace98761a82f66f4a5ca917
6
+ metadata.gz: 5dd769632abf8da06746802aedbcfc83a3bb49edfec68364f068c476454469b38ba8e91a0ea721026202d025714a88ee4a969d76ea928f88b0a48af134cb6f71
7
+ data.tar.gz: ad6bea9eee317516acfe91241137896f474877531ef34c4b69fe1e297b7ca346757e88b9baf7e67dc16b01b9ddc86994900ae1b442ac2325cf59cc909c955c9b
data/.rubocop.yml CHANGED
@@ -27,3 +27,6 @@ Style/AsciiComments:
27
27
 
28
28
  Style/MethodMissing:
29
29
  Enabled: false
30
+
31
+ Style/ConstantName:
32
+ Enabled: false
@@ -0,0 +1,59 @@
1
+ module Persian
2
+ # Homonyms of persian
3
+ module Homonyms
4
+ include Alphabet
5
+
6
+ T = [
7
+ TE,
8
+ TA
9
+ ].freeze
10
+
11
+ S = [
12
+ THE,
13
+ SIN,
14
+ SAD
15
+ ].freeze
16
+
17
+ H = [
18
+ HE_JIMI,
19
+ HE_DOCHESHM
20
+ ].freeze
21
+
22
+ Z = [
23
+ ZAL,
24
+ ZE,
25
+ ZA,
26
+ ZAD
27
+ ].freeze
28
+
29
+ GH = [
30
+ GHEIN,
31
+ QAF
32
+ ].freeze
33
+
34
+ # List of all Homonyms classified in a hash
35
+ ALL = {
36
+ T: T,
37
+ S: S,
38
+ H: H,
39
+ Z: Z,
40
+ GH: GH
41
+ }.freeze
42
+
43
+ # List of all Homonyms bulk in array
44
+ ALL_a = [
45
+ T, S, H, Z, GH
46
+ ].flatten.freeze
47
+
48
+ # Hash reverse list of Homonyms
49
+ temp = {}
50
+
51
+ ALL.each do |key, value|
52
+ value.each do |i|
53
+ temp[i.to_s] = key
54
+ end
55
+ end
56
+
57
+ ALL_r = temp.freeze
58
+ end
59
+ end
@@ -20,29 +20,35 @@ module Persian
20
20
 
21
21
  # Remove Arabic harecats from text
22
22
  def self.remove_harekats(text)
23
- HAREKATS.each { |v| text.gsub!(v, '') }
23
+ HAREKATS.each { |v| text = text.gsub(v, '') }
24
24
  text
25
25
  end
26
26
 
27
27
  # Remove All barckets
28
28
  def self.remove_brackets(text)
29
- BRACKETS.each { |v| text.gsub!(v, '') }
29
+ BRACKETS.each { |v| text = text.gsub(v, '') }
30
30
  text
31
31
  end
32
32
 
33
33
  # Remove Persian signs
34
34
  def self.remove_signs(text, with = '')
35
- SIGNS.each { |v| text.gsub!(v, with) }
35
+ return '' if text.nil?
36
+ SIGNS.each { |v| text = text.gsub(v, with) }
37
+ text
38
+ end
39
+
40
+ def self.replace_zwnj_with_space(text)
41
+ text = text.gsub(/(‌)/, ' ')
36
42
  text
37
43
  end
38
44
 
39
45
  # Replace general brackets with one type brackets
40
46
  # Default: 0xAB & 0xBB
41
47
  def self.general_brackets(text, left = '«', right = '»')
42
- text.gsub!(/"(.*?)"/, left + '\1' + right)
43
- text.gsub!(/\[(.*?)\]/, left + '\1' + right)
44
- text.gsub!(/\{(.*?)\}/, left + '\1' + right)
45
- text.gsub!(/\((.*?)\)/, left + '\1' + right)
48
+ text = text.gsub(/"(.*?)"/, left + '\1' + right)
49
+ text = text.gsub(/\[(.*?)\]/, left + '\1' + right)
50
+ text = text.gsub(/\{(.*?)\}/, left + '\1' + right)
51
+ text = text.gsub(/\((.*?)\)/, left + '\1' + right)
46
52
  text
47
53
  end
48
54
 
@@ -193,5 +199,16 @@ module Persian
193
199
  text.gsub!(/(\.)(\S)/, '\1 \2')
194
200
  text
195
201
  end
202
+
203
+ def self.squeeze(text)
204
+ text.squeeze
205
+ end
206
+
207
+ # Remove specific character from end of text
208
+ # EXample: remove_postfix('پسره','ه')
209
+ def self.remove_postfix(text, postfix)
210
+ text.chomp!(postfix)
211
+ text
212
+ end
196
213
  end
197
214
  end
@@ -32,6 +32,20 @@ module Persian
32
32
  tokens.flatten
33
33
  end
34
34
 
35
+ def self.tokenize_more(text, num)
36
+ list = tokenize(text)
37
+ tokens = []
38
+ 0.upto list.size - num do |i|
39
+ token = ''
40
+ 0.upto num - 1 do |j|
41
+ token += list[i + j] + ' '
42
+ end
43
+ tokens.push token.strip
44
+ end
45
+
46
+ tokens
47
+ end
48
+
35
49
  # Split paragraphs
36
50
  # Return an array of paragraphs
37
51
  def self.split_paragraphs(text)
data/lib/persian/url.rb CHANGED
@@ -10,7 +10,7 @@ module Persian
10
10
  # remove harekats
11
11
  text = Text.remove_harekats(text)
12
12
  # remove slash and backslash
13
- text.gsub!(%r{(\/||\\)}, '')
13
+ text = text.gsub(%r{(\/||\\)}, '')
14
14
  # remove signs
15
15
  text = Text.remove_signs(text, ' ')
16
16
  # Remove extra spaces
@@ -18,7 +18,7 @@ module Persian
18
18
  # trim spaces from start and end of text
19
19
  text = text.strip
20
20
  # replace space with dash
21
- text.gsub!(/\s/, '-')
21
+ text = text.gsub(/\s/, '-')
22
22
  text
23
23
  end
24
24
  end
@@ -2,5 +2,5 @@
2
2
 
3
3
  # Persian module
4
4
  module Persian
5
- VERSION = '0.2.1'.freeze
5
+ VERSION = '0.2.2'.freeze
6
6
  end
data/lib/persian.rb CHANGED
@@ -4,6 +4,7 @@
4
4
  require 'persian/list/alphabet'
5
5
  require 'persian/list/number'
6
6
  require 'persian/list/character'
7
+ require 'persian/list/homonyms'
7
8
 
8
9
  # classes
9
10
  require 'persian/number'
data/persian.gemspec CHANGED
@@ -8,7 +8,7 @@ require 'persian/version'
8
8
  Gem::Specification.new do |s|
9
9
  s.name = 'persian'
10
10
  s.version = Persian::VERSION
11
- s.date = '2016-11-16'
11
+ s.date = '2022-03-25'
12
12
  s.summary = 'Persian language for ruby.'
13
13
  s.description = 'A set of utilities for Persian language.'
14
14
  s.authors = ['Dariush Abbasi']
@@ -19,7 +19,7 @@ Gem::Specification.new do |s|
19
19
  `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
20
20
  s.require_paths = ['lib']
21
21
  s.homepage =
22
- 'http://github.com/negah/persian'
22
+ 'http://github.com/dariubs/persian.rb'
23
23
  s.license = 'MIT'
24
24
 
25
25
  s.add_development_dependency 'rspec', '3.4'
data/readme.md CHANGED
@@ -1,27 +1,28 @@
1
1
  <p align="center">
2
2
  <img src="https://upload.wikimedia.org/wikipedia/commons/a/a2/Farsi.svg"
3
- height="130" alt="Persian">
4
- </p>
5
- <p align="center">
6
- <a href="https://travis-ci.org/negah/persian">
7
- <img src="https://travis-ci.org/negah/persian.svg?branch=master"
8
- alt="Build Status">
9
- </a>
10
- <a href="https://rubygems.org/gems/persian">
11
- <img src="https://img.shields.io/badge/gem-persian-orange.svg"
12
- alt="Ruby Gems">
13
- </a>
14
-
15
- <a href="https://rubygems.org/gems/persian">
16
- <img src="https://img.shields.io/gem/dv/persian/stable.svg?maxAge=2592000"
17
- alt="Ruby Gems downloads">
18
- </a>
19
-
20
- <a href="https://codeclimate.com/github/negah/persian">
21
- <img src="https://codeclimate.com/github/negah/persian/badges/gpa.svg"
22
- alt="Code Climate">
23
- </a>
3
+ height="130" alt="Persian ruby gem">
24
4
  </p>
5
+
6
+ <a href="https://travis-ci.org/negah/persian">
7
+ <img src="https://travis-ci.org/negah/persian.svg?branch=master"
8
+ alt="Build Status">
9
+ </a>
10
+
11
+ <a href="https://rubygems.org/gems/persian">
12
+ <img src="https://img.shields.io/badge/gem-persian-orange.svg"
13
+ alt="Ruby Gems">
14
+ </a>
15
+
16
+ <a href="https://rubygems.org/gems/persian">
17
+ <img src="https://img.shields.io/gem/dv/persian/stable.svg?maxAge=2592000"
18
+ alt="Ruby Gems downloads">
19
+ </a>
20
+
21
+ <a href="https://codeclimate.com/github/negah/persian">
22
+ <img src="https://codeclimate.com/github/negah/persian/badges/gpa.svg"
23
+ alt="Code Climate">
24
+ </a>
25
+
25
26
  <p align="center"><sup><strong> Ruby gem for working with Persian text. </strong></sup></p>
26
27
 
27
28
 
data/spec/text_spec.rb CHANGED
@@ -33,6 +33,13 @@ describe 'persian character methods' do
33
33
  expect(Persian::Text.remove_signs(before)).to eq(after)
34
34
  end
35
35
 
36
+ it 'should replace all zwnjs with space ' do
37
+ before = 'من‌در‌نیم‌فاصله‌ات‌اسیرم'
38
+ after = 'من در نیم فاصله ات اسیرم'
39
+
40
+ expect(Persian::Text.replace_zwnj_with_space(before)).to eq(after)
41
+ end
42
+
36
43
  it 'should replace [ & ], { & }, ( & ), " & " with « & »' do
37
44
  before_first = 'اگر اراده ای نباشد عشقی نیست. "گاندی"'
38
45
  after_first = 'اگر اراده ای نباشد عشقی نیست. «گاندی»'
@@ -233,4 +240,19 @@ describe 'persian character methods' do
233
240
  after = 'سلام. اسپیس کو؟'
234
241
  expect(Persian::Text.space_after_dot(text)).to eq(after)
235
242
  end
243
+
244
+ it 'should remove all repeated characters from text' do
245
+ text = 'سلااااااام.چی میکنییی؟؟؟؟؟'
246
+ after = 'سلام.چی میکنی؟'
247
+
248
+ expect(Persian::Text.squeeze(text)).to eq(after)
249
+ end
250
+
251
+ it 'should remove text postfix' do
252
+ text = 'پسره'
253
+ postfix = 'ه'
254
+ result = 'پسر'
255
+
256
+ expect(Persian::Text.remove_postfix(text, postfix)).to eq(result)
257
+ end
236
258
  end
@@ -10,6 +10,14 @@ describe 'persian tokenizers' do
10
10
  expect(Persian::Tokenizer.tokenize(before)).to eq(after)
11
11
  end
12
12
 
13
+ it 'should tokenize as a serie' do
14
+ text = 'سلام من به تو یار قدیمی'
15
+ parts = 3
16
+ result = ['سلام من به', 'من به تو', 'به تو یار', 'تو یار قدیمی']
17
+
18
+ expect(Persian::Tokenizer.tokenize_more(text, parts)).to eq(result)
19
+ end
20
+
13
21
  it 'should split paragraphs' do
14
22
  text = "
15
23
  یوهانس برامس در سال ۱۸۳۳ در شهر هامبورگ آلمان در خانواده‌ای فقیر به دنیا آمد. تحصیلات ابتدایی موسیقی را نزد پدرش که نوازنده کنترباس بود فرا گرفت.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: persian
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dariush Abbasi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-16 00:00:00.000000000 Z
11
+ date: 2022-03-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -43,6 +43,7 @@ files:
43
43
  - lib/persian/dynamic.rb
44
44
  - lib/persian/list/alphabet.rb
45
45
  - lib/persian/list/character.rb
46
+ - lib/persian/list/homonyms.rb
46
47
  - lib/persian/list/number.rb
47
48
  - lib/persian/num_text.rb
48
49
  - lib/persian/number.rb
@@ -63,7 +64,7 @@ files:
63
64
  - spec/tokenizer_spec.rb
64
65
  - spec/unicode_spec.rb
65
66
  - spec/url_spec.rb
66
- homepage: http://github.com/negah/persian
67
+ homepage: http://github.com/dariubs/persian.rb
67
68
  licenses:
68
69
  - MIT
69
70
  metadata: {}
@@ -82,8 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
82
83
  - !ruby/object:Gem::Version
83
84
  version: '0'
84
85
  requirements: []
85
- rubyforge_project:
86
- rubygems_version: 2.5.1
86
+ rubygems_version: 3.2.5
87
87
  signing_key:
88
88
  specification_version: 4
89
89
  summary: Persian language for ruby.