farsi_processor 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 14caf0cd116b2df5f3c3dec3266733e2356d8a3b
4
- data.tar.gz: 231eb73eda6fd8d035ab607f81128a1179eaf756
2
+ SHA256:
3
+ metadata.gz: dd54aed2bba8590ca05133cf27b13230d81827686e884509259ab3a97e4eae2b
4
+ data.tar.gz: 1f321d0f1062af86e72973025cd923509545271e980190cb53cd8bb7a377b178
5
5
  SHA512:
6
- metadata.gz: 5d50d1e956b6a582c37de9a62fa122f652a0ae2b036ef7303df142d4ae4317feb8961986b7f62c0e2603e682c487b3cfe9cd5fe6a5b936d6fe92bc108198a582
7
- data.tar.gz: a7dd822e3a43d8ea0c3855fed403aa7b3972185e0bfe4cd0b8e9ed7ba4c6d9b3a9849ce4cf2d3457da3a97275d9cfd02d2c6c6049b783ab6081b6f942f09c14a
6
+ metadata.gz: 38b5af6c8d3403a0c5b4f4ab75393cfcb2c130ac8259f639f21daf85eca2b844126c06b2e9183b05c1ee0975b5a085f9f3d1517570b55ce40eef94466bde2de8
7
+ data.tar.gz: 964bce4fb3443c21065b6ab7e794d830fbb7be18501ef1646e710c246803630cf49c3f8edcdc72954bdf63da18ed410ed7c099ecad1d1b8d05c52fedab6a6612
@@ -1,8 +1,11 @@
1
1
  require 'farsi_processor/version'
2
- require_relative 'farsi_normalizer'
3
- require_relative 'farsi_stemmer'
2
+ require_relative 'normalizer'
3
+ require_relative 'stemmer'
4
4
 
5
5
  class FarsiProcessor
6
+ include Normalizer
7
+ include Stemmer
8
+
6
9
  def self.process(word, options = {})
7
10
  new(word, options).process
8
11
  end
@@ -15,11 +18,19 @@ class FarsiProcessor
15
18
  new(word, options).stem
16
19
  end
17
20
 
18
- attr_reader :word, :options
21
+ attr_reader :word, :options, :excepts, :onlys
19
22
 
20
23
  def initialize(word, options = {})
21
24
  @word = word
22
25
  @options = options
26
+
27
+ @onlys = []
28
+ @excepts = []
29
+ if options[:only]
30
+ @onlys = options[:only]
31
+ elsif options[:except]
32
+ @excepts = options[:except]
33
+ end
23
34
  end
24
35
 
25
36
  def process
@@ -27,11 +38,15 @@ class FarsiProcessor
27
38
  stem
28
39
  end
29
40
 
30
- def normalize
31
- @word = FarsiNormalizer.process(word, options)
32
- end
41
+ private
33
42
 
34
- def stem
35
- @word = FarsiStemmer.process(word, options)
43
+ def filter_rules(group)
44
+ if excepts.any?
45
+ group.reject { |k, _v| excepts.include?(k) }
46
+ elsif onlys.any?
47
+ group.select { |k, _v| onlys.include?(k) }
48
+ else
49
+ group
50
+ end
36
51
  end
37
52
  end
@@ -1,3 +1,3 @@
1
1
  class FarsiProcessor
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
@@ -1,6 +1,4 @@
1
- require 'farsi_processor/version'
2
-
3
- class FarsiNormalizer
1
+ module Normalizer
4
2
  ARABIC_KAF = "\u0643".freeze # ك
5
3
  FARSI_KEHEH = "\u06a9".freeze # ک
6
4
 
@@ -45,25 +43,7 @@ class FarsiNormalizer
45
43
  SUKUN
46
44
  ].freeze
47
45
 
48
- def self.process(word, options = {})
49
- new(word, options).process
50
- end
51
-
52
- attr_reader :word, :excepts, :onlys
53
-
54
- def initialize(word, options = {})
55
- @word = word.dup
56
-
57
- @onlys = []
58
- @excepts = []
59
- if options[:only]
60
- @onlys = options[:only]
61
- elsif options[:except]
62
- @excepts = options[:except]
63
- end
64
- end
65
-
66
- def process
46
+ def normalize
67
47
  map_charachters
68
48
  remove_diacritics
69
49
  word
@@ -71,27 +51,17 @@ class FarsiNormalizer
71
51
 
72
52
  private
73
53
 
74
- def filter_rules(group)
75
- if excepts.any?
76
- group.reject { |k, _v| excepts.include?(k) }
77
- elsif onlys.any?
78
- group.select { |k, _v| onlys.include?(k) }
79
- else
80
- group
81
- end
82
- end
83
-
84
54
  def map_charachters
85
55
  rules = filter_rules(CHARACTERS_MAPPINGS)
86
56
  return if rules.empty?
87
57
 
88
- word.gsub!(/[#{rules.keys.join}]/, rules)
58
+ @word = word.gsub(/[#{rules.keys.join}]/, rules)
89
59
  end
90
60
 
91
61
  def remove_diacritics
92
62
  rules = filter_rules(DIACRITICS)
93
63
  return if rules.empty?
94
64
 
95
- word.gsub!(/[#{rules.join}]/, '')
65
+ @word = word.gsub(/[#{rules.join}]/, '')
96
66
  end
97
67
  end
@@ -1,6 +1,4 @@
1
- require 'farsi_processor/version'
2
-
3
- class FarsiStemmer
1
+ module Stemmer
4
2
  ALEF = "\u0627".freeze # ا
5
3
  YEH = "\u06cc".freeze # ی
6
4
  HEH = "\u0647".freeze # ه
@@ -27,41 +25,13 @@ class FarsiStemmer
27
25
  ALEF + YEH
28
26
  ] + PLURAL_FORMS
29
27
 
30
- def self.process(word, options = {})
31
- new(word, options).process
32
- end
33
-
34
- attr_reader :word, :excepts, :onlys
35
-
36
- def initialize(word, options = {})
37
- @word = word.dup
38
-
39
- @onlys = []
40
- @excepts = []
41
- if options[:only]
42
- @onlys = options[:only]
43
- elsif options[:except]
44
- @excepts = options[:except]
45
- end
46
- end
47
-
48
- def process
28
+ def stem
49
29
  stem_suffix
50
- word.strip
30
+ @word = word.strip
51
31
  end
52
32
 
53
33
  private
54
34
 
55
- def filter_rules(group)
56
- if excepts.any?
57
- group.reject { |k, _v| excepts.include?(k) }
58
- elsif onlys.any?
59
- group.select { |k, _v| onlys.include?(k) }
60
- else
61
- group
62
- end
63
- end
64
-
65
35
  def stem_suffix
66
36
  filter_rules(SUFFIXES).each do |suffix|
67
37
  if word.end_with?(suffix)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: farsi_processor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - mark jad
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-12-27 00:00:00.000000000 Z
11
+ date: 2018-01-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -72,10 +72,10 @@ files:
72
72
  - bin/console
73
73
  - bin/setup
74
74
  - farsi_processor.gemspec
75
- - lib/farsi_normalizer.rb
76
75
  - lib/farsi_processor.rb
77
76
  - lib/farsi_processor/version.rb
78
- - lib/farsi_stemmer.rb
77
+ - lib/normalizer.rb
78
+ - lib/stemmer.rb
79
79
  homepage: https://github.com/mshka/farsi_processor
80
80
  licenses:
81
81
  - MIT
@@ -96,7 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
96
96
  version: '0'
97
97
  requirements: []
98
98
  rubyforge_project:
99
- rubygems_version: 2.5.2
99
+ rubygems_version: 2.7.4
100
100
  signing_key:
101
101
  specification_version: 4
102
102
  summary: farsi_processor is a Ruby gem to process (stem and normalize) persian/farsi