farsi_processor 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/farsi_processor.rb +23 -8
- data/lib/farsi_processor/version.rb +1 -1
- data/lib/{farsi_normalizer.rb → normalizer.rb} +4 -34
- data/lib/{farsi_stemmer.rb → stemmer.rb} +3 -33
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: dd54aed2bba8590ca05133cf27b13230d81827686e884509259ab3a97e4eae2b
|
4
|
+
data.tar.gz: 1f321d0f1062af86e72973025cd923509545271e980190cb53cd8bb7a377b178
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 38b5af6c8d3403a0c5b4f4ab75393cfcb2c130ac8259f639f21daf85eca2b844126c06b2e9183b05c1ee0975b5a085f9f3d1517570b55ce40eef94466bde2de8
|
7
|
+
data.tar.gz: 964bce4fb3443c21065b6ab7e794d830fbb7be18501ef1646e710c246803630cf49c3f8edcdc72954bdf63da18ed410ed7c099ecad1d1b8d05c52fedab6a6612
|
data/lib/farsi_processor.rb
CHANGED
@@ -1,8 +1,11 @@
|
|
1
1
|
require 'farsi_processor/version'
|
2
|
-
require_relative '
|
3
|
-
require_relative '
|
2
|
+
require_relative 'normalizer'
|
3
|
+
require_relative 'stemmer'
|
4
4
|
|
5
5
|
class FarsiProcessor
|
6
|
+
include Normalizer
|
7
|
+
include Stemmer
|
8
|
+
|
6
9
|
def self.process(word, options = {})
|
7
10
|
new(word, options).process
|
8
11
|
end
|
@@ -15,11 +18,19 @@ class FarsiProcessor
|
|
15
18
|
new(word, options).stem
|
16
19
|
end
|
17
20
|
|
18
|
-
attr_reader :word, :options
|
21
|
+
attr_reader :word, :options, :excepts, :onlys
|
19
22
|
|
20
23
|
def initialize(word, options = {})
|
21
24
|
@word = word
|
22
25
|
@options = options
|
26
|
+
|
27
|
+
@onlys = []
|
28
|
+
@excepts = []
|
29
|
+
if options[:only]
|
30
|
+
@onlys = options[:only]
|
31
|
+
elsif options[:except]
|
32
|
+
@excepts = options[:except]
|
33
|
+
end
|
23
34
|
end
|
24
35
|
|
25
36
|
def process
|
@@ -27,11 +38,15 @@ class FarsiProcessor
|
|
27
38
|
stem
|
28
39
|
end
|
29
40
|
|
30
|
-
|
31
|
-
@word = FarsiNormalizer.process(word, options)
|
32
|
-
end
|
41
|
+
private
|
33
42
|
|
34
|
-
def
|
35
|
-
|
43
|
+
def filter_rules(group)
|
44
|
+
if excepts.any?
|
45
|
+
group.reject { |k, _v| excepts.include?(k) }
|
46
|
+
elsif onlys.any?
|
47
|
+
group.select { |k, _v| onlys.include?(k) }
|
48
|
+
else
|
49
|
+
group
|
50
|
+
end
|
36
51
|
end
|
37
52
|
end
|
@@ -1,6 +1,4 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
class FarsiNormalizer
|
1
|
+
module Normalizer
|
4
2
|
ARABIC_KAF = "\u0643".freeze # ك
|
5
3
|
FARSI_KEHEH = "\u06a9".freeze # ک
|
6
4
|
|
@@ -45,25 +43,7 @@ class FarsiNormalizer
|
|
45
43
|
SUKUN
|
46
44
|
].freeze
|
47
45
|
|
48
|
-
def
|
49
|
-
new(word, options).process
|
50
|
-
end
|
51
|
-
|
52
|
-
attr_reader :word, :excepts, :onlys
|
53
|
-
|
54
|
-
def initialize(word, options = {})
|
55
|
-
@word = word.dup
|
56
|
-
|
57
|
-
@onlys = []
|
58
|
-
@excepts = []
|
59
|
-
if options[:only]
|
60
|
-
@onlys = options[:only]
|
61
|
-
elsif options[:except]
|
62
|
-
@excepts = options[:except]
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def process
|
46
|
+
def normalize
|
67
47
|
map_charachters
|
68
48
|
remove_diacritics
|
69
49
|
word
|
@@ -71,27 +51,17 @@ class FarsiNormalizer
|
|
71
51
|
|
72
52
|
private
|
73
53
|
|
74
|
-
def filter_rules(group)
|
75
|
-
if excepts.any?
|
76
|
-
group.reject { |k, _v| excepts.include?(k) }
|
77
|
-
elsif onlys.any?
|
78
|
-
group.select { |k, _v| onlys.include?(k) }
|
79
|
-
else
|
80
|
-
group
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
54
|
def map_charachters
|
85
55
|
rules = filter_rules(CHARACTERS_MAPPINGS)
|
86
56
|
return if rules.empty?
|
87
57
|
|
88
|
-
word.gsub
|
58
|
+
@word = word.gsub(/[#{rules.keys.join}]/, rules)
|
89
59
|
end
|
90
60
|
|
91
61
|
def remove_diacritics
|
92
62
|
rules = filter_rules(DIACRITICS)
|
93
63
|
return if rules.empty?
|
94
64
|
|
95
|
-
word.gsub
|
65
|
+
@word = word.gsub(/[#{rules.join}]/, '')
|
96
66
|
end
|
97
67
|
end
|
@@ -1,6 +1,4 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
class FarsiStemmer
|
1
|
+
module Stemmer
|
4
2
|
ALEF = "\u0627".freeze # ا
|
5
3
|
YEH = "\u06cc".freeze # ی
|
6
4
|
HEH = "\u0647".freeze # ه
|
@@ -27,41 +25,13 @@ class FarsiStemmer
|
|
27
25
|
ALEF + YEH
|
28
26
|
] + PLURAL_FORMS
|
29
27
|
|
30
|
-
def
|
31
|
-
new(word, options).process
|
32
|
-
end
|
33
|
-
|
34
|
-
attr_reader :word, :excepts, :onlys
|
35
|
-
|
36
|
-
def initialize(word, options = {})
|
37
|
-
@word = word.dup
|
38
|
-
|
39
|
-
@onlys = []
|
40
|
-
@excepts = []
|
41
|
-
if options[:only]
|
42
|
-
@onlys = options[:only]
|
43
|
-
elsif options[:except]
|
44
|
-
@excepts = options[:except]
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def process
|
28
|
+
def stem
|
49
29
|
stem_suffix
|
50
|
-
word.strip
|
30
|
+
@word = word.strip
|
51
31
|
end
|
52
32
|
|
53
33
|
private
|
54
34
|
|
55
|
-
def filter_rules(group)
|
56
|
-
if excepts.any?
|
57
|
-
group.reject { |k, _v| excepts.include?(k) }
|
58
|
-
elsif onlys.any?
|
59
|
-
group.select { |k, _v| onlys.include?(k) }
|
60
|
-
else
|
61
|
-
group
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
35
|
def stem_suffix
|
66
36
|
filter_rules(SUFFIXES).each do |suffix|
|
67
37
|
if word.end_with?(suffix)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: farsi_processor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mark jad
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-01-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -72,10 +72,10 @@ files:
|
|
72
72
|
- bin/console
|
73
73
|
- bin/setup
|
74
74
|
- farsi_processor.gemspec
|
75
|
-
- lib/farsi_normalizer.rb
|
76
75
|
- lib/farsi_processor.rb
|
77
76
|
- lib/farsi_processor/version.rb
|
78
|
-
- lib/
|
77
|
+
- lib/normalizer.rb
|
78
|
+
- lib/stemmer.rb
|
79
79
|
homepage: https://github.com/mshka/farsi_processor
|
80
80
|
licenses:
|
81
81
|
- MIT
|
@@ -96,7 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
96
96
|
version: '0'
|
97
97
|
requirements: []
|
98
98
|
rubyforge_project:
|
99
|
-
rubygems_version: 2.
|
99
|
+
rubygems_version: 2.7.4
|
100
100
|
signing_key:
|
101
101
|
specification_version: 4
|
102
102
|
summary: farsi_processor is a Ruby gem to process (stem and normalize) persian/farsi
|