turkish_stemmer 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,26 +0,0 @@
1
- # coding : utf-8
2
- require 'benchmark'
3
- require 'turkish_stemmer'
4
-
5
- Benchmark.bmbm(7) do |x|
6
-
7
- x.report('regex') do
8
- TurkishStemmer.class_eval do
9
- def self.turkish?(word)
10
- !! word.match(TurkishStemmer::ALPHABET)
11
- end
12
- end
13
-
14
- 100_000.times { TurkishStemmer.turkish?("aaa") }
15
- end
16
-
17
- x.report('loop') do
18
- TurkishStemmer.class_eval do
19
- def self.turkish?(word)
20
- !! word.chars.to_a.all? { |c| "abcçdefgğhıijklmnoöprsştuüvyz".include?(c) }
21
- end
22
- end
23
-
24
- 100_000.times { TurkishStemmer.turkish?("aaaa") }
25
- end
26
- end
@@ -1,10 +0,0 @@
1
- a:
2
- transitions:
3
- - suffix: :s1
4
- state: :b
5
-
6
- final_state: false
7
-
8
- b:
9
- transitions: []
10
- final_state: true
@@ -1,6 +0,0 @@
1
- s1:
2
- name: "-lU"
3
- regex: "lı|li|lu|lü"
4
- optional_letter: false
5
- check_harmony: true
6
-
@@ -1,121 +0,0 @@
1
- # The order of the transitions is very crusial.
2
- a:
3
- transitions:
4
- # Transitions to state B
5
- - suffix: :s1
6
- state: :b
7
- - suffix: :s2
8
- state: :b
9
- - suffix: :s4
10
- state: :b
11
- - suffix: :s3
12
- state: :b
13
- # Transitions to state C
14
- - suffix: :s5
15
- state: :c
16
- # Transitions to state D
17
- - suffix: :s6
18
- state: :d
19
- - suffix: :s7
20
- state: :d
21
- - suffix: :s8
22
- state: :d
23
- - suffix: :s9
24
- state: :d
25
- # Transitions to state E
26
- - suffix: :s10
27
- state: :e
28
- # Transitions to state F
29
- - suffix: :s12
30
- state: :f
31
- - suffix: :s13
32
- state: :f
33
- - suffix: :s14
34
- state: :f
35
- - suffix: :s15
36
- state: :f
37
- # Transitions to state H
38
- - suffix: :s11
39
- state: :h
40
-
41
- final_state: false
42
-
43
- b:
44
- transitions:
45
- - suffix: :s14
46
- state: :f
47
-
48
- final_state: true
49
-
50
- c:
51
- transitions:
52
- - suffix: :s10
53
- state: :f
54
- - suffix: :s12
55
- state: :f
56
- - suffix: :s13
57
- state: :f
58
- - suffix: :s14
59
- state: :f
60
-
61
- final_state: true
62
-
63
- d:
64
- transitions:
65
- - suffix: :s12
66
- state: :f
67
- - suffix: :s13
68
- state: :f
69
-
70
- final_state: false
71
-
72
- e:
73
- transitions:
74
- # Transitions to state G
75
- - suffix: :s1
76
- state: :g
77
- - suffix: :s2
78
- state: :g
79
- - suffix: :s3
80
- state: :g
81
- - suffix: :s4
82
- state: :g
83
- - suffix: :s5
84
- state: :g
85
- # Transitions to state F
86
- - suffix: :s14
87
- state: :f
88
- final_state: true
89
-
90
- f:
91
- transitions: []
92
-
93
- final_state: true
94
-
95
-
96
- g:
97
- transitions:
98
- - suffix: :s14
99
- state: :f
100
-
101
- final_state: false
102
-
103
- h:
104
- transitions:
105
- # Transitions to state F
106
- - suffix: :s14
107
- state: :f
108
- # Transitions to state G
109
- - suffix: :s1
110
- state: :g
111
- - suffix: :s2
112
- state: :g
113
- - suffix: :s3
114
- state: :g
115
- - suffix: :s4
116
- state: :g
117
- - suffix: :s5
118
- state: :g
119
-
120
- final_state: false
121
-
@@ -1,90 +0,0 @@
1
- s1:
2
- name: "-(y)Um"
3
- regex: "ım|im|um|üm"
4
- optional_letter: "y"
5
- check_harmony: true
6
-
7
- s2:
8
- name: "-sUn"
9
- regex: "sın|sin|sun|sün"
10
- optional_letter: false
11
- check_harmony: true
12
-
13
- s3:
14
- name: "-(y)Uz"
15
- regex: "ız|iz|uz|üz"
16
- optional_letter: "y"
17
- check_harmony: true
18
-
19
- s4:
20
- name: "-sUnUz"
21
- regex: "sınız|siniz|sunuz|sünüz"
22
- optional_letter: false
23
- check_harmony: true
24
-
25
- s5:
26
- name: "-lAr"
27
- regex: "lar|ler"
28
- optional_letter: false
29
- check_harmony: true
30
-
31
- s6:
32
- name: "-m"
33
- regex: "m"
34
- optional_letter: false
35
- check_harmony: true
36
-
37
- s7:
38
- name: "-n"
39
- regex: "n"
40
- optional_letter: false
41
- check_harmony: true
42
-
43
- s8:
44
- name: "-k"
45
- regex: "k"
46
- optional_letter: false
47
- check_harmony: true
48
-
49
- s9:
50
- name: "-nUz"
51
- regex: "nız|niz|nuz|nüz"
52
- optional_letter: false
53
- check_harmony: true
54
-
55
- s10:
56
- name: "-DUr"
57
- regex: "tır|tir|tur|tür|dır|dir|dur|dür"
58
- optional_letter: false
59
- check_harmony: true
60
-
61
- s11:
62
- name: "-cAsInA"
63
- regex: "casına|çasına|cesine|çesine"
64
- optional_letter: false
65
- check_harmony: true
66
-
67
- s12:
68
- name: "-(y)DU"
69
- regex: "dı|di|du|dü|tı|ti|tu|tü"
70
- optional_letter: "y"
71
- check_harmony: true
72
-
73
- s13:
74
- name: "-(y)sA"
75
- regex: "sa|se"
76
- optional_letter: "y"
77
- check_harmony: true
78
-
79
- s14:
80
- name: "-(y)mUş"
81
- regex: "muş|miş|müş|mış"
82
- optional_letter: "y"
83
- check_harmony: true
84
-
85
- s15:
86
- name: "-(y)ken"
87
- regex: "ken"
88
- optional_letter: "y"
89
- check_harmony: true
90
-
@@ -1,177 +0,0 @@
1
- # The order of the transitions is very crusial.
2
- a:
3
- transitions:
4
- - suffix: :s16
5
- state: :c
6
- - suffix: :s7
7
- state: :k
8
- - suffix: :s3
9
- state: :h
10
- - suffix: :s5
11
- state: :h
12
- - suffix: :s1
13
- state: :l
14
- - suffix: :s14
15
- state: :f
16
- - suffix: :s15
17
- state: :g
18
- - suffix: :s17
19
- state: :e
20
- - suffix: :s10
21
- state: :e
22
- - suffix: :s19
23
- state: :m
24
- - suffix: :s4
25
- state: :h
26
- - suffix: :s9
27
- state: :c
28
- - suffix: :s12
29
- state: :f
30
- - suffix: :s13
31
- state: :b
32
- - suffix: :s18
33
- state: :d
34
- - suffix: :s2
35
- state: :h
36
- - suffix: :s6
37
- state: :h
38
- - suffix: :s8
39
- state: :b
40
- - suffix: :s11
41
- state: :b
42
-
43
- final_state: true
44
-
45
- b:
46
- transitions:
47
- - suffix: :s3
48
- state: :h
49
- - suffix: :s5
50
- state: :h
51
- - suffix: :s1
52
- state: :l
53
- - suffix: :s4
54
- state: :h
55
- - suffix: :s2
56
- state: :h
57
-
58
- final_state: true
59
-
60
- c:
61
- transitions:
62
- # Transitions to state K
63
- - suffix: :s7
64
- state: :k
65
- # Transitions to state H
66
- - suffix: :s6
67
- state: :h
68
-
69
- final_state: false
70
-
71
- d:
72
- transitions:
73
- # Transitions to state F
74
- - suffix: :s14
75
- state: :f
76
- # Transitions to state E
77
- - suffix: :s10
78
- state: :e
79
- # Transitions to state B
80
- - suffix: :s13
81
- state: :b
82
-
83
- final_state: false
84
-
85
- e:
86
- transitions:
87
- - suffix: :s7
88
- state: :k
89
- - suffix: :s3
90
- state: :h
91
- - suffix: :s5
92
- state: :h
93
- - suffix: :s1
94
- state: :l
95
- - suffix: :s4
96
- state: :h
97
- - suffix: :s18
98
- state: :d
99
- - suffix: :s2
100
- state: :h
101
- - suffix: :s6
102
- state: :h
103
-
104
- final_state: true
105
-
106
- f:
107
- transitions:
108
- # Transitions to state K
109
- - suffix: :s7
110
- state: :k
111
- # Transitions to state D
112
- - suffix: :s18
113
- state: :d
114
- # Transitions to state H
115
- - suffix: :s6
116
- state: :h
117
-
118
- final_state: false
119
-
120
-
121
- g:
122
- transitions:
123
- - suffix: :s5
124
- state: :h
125
- - suffix: :s3
126
- state: :h
127
- - suffix: :s1
128
- state: :l
129
- - suffix: :s4
130
- state: :h
131
- - suffix: :s18
132
- state: :d
133
- - suffix: :s2
134
- state: :h
135
-
136
- final_state: true
137
-
138
- h:
139
- transitions:
140
- # Transitions to state L
141
- - suffix: :s1
142
- state: :l
143
-
144
- final_state: true
145
-
146
- k:
147
- transitions: []
148
-
149
- final_state: true
150
-
151
- l:
152
- transitions:
153
- # Transitions to state D
154
- - suffix: :s18
155
- state: :d
156
-
157
- final_state: true
158
-
159
- m:
160
- transitions:
161
- - suffix: :s7
162
- state: :k
163
- - suffix: :s3
164
- state: :h
165
- - suffix: :s5
166
- state: :h
167
- - suffix: :s1
168
- state: :l
169
- - suffix: :s4
170
- state: :h
171
- - suffix: :s2
172
- state: :h
173
- - suffix: :s6
174
- state: :h
175
-
176
- final_state: true
177
-
@@ -1,113 +0,0 @@
1
- s1:
2
- name: "-lAr"
3
- regex: "lar|ler"
4
- optional_letter: false
5
- check_harmony: true
6
-
7
- s2:
8
- name: "-(U)m"
9
- regex: "m"
10
- optional_letter: "ı|i|u|ü"
11
- check_harmony: true
12
-
13
- s3:
14
- name: "-(U)mUz"
15
- regex: "mız|miz|muz|müz"
16
- optional_letter: "ı|i|u|ü"
17
- check_harmony: true
18
-
19
- s4:
20
- name: "-Un"
21
- regex: "ın|in|un|ün"
22
- optional_letter: false
23
- check_harmony: true
24
-
25
- s5:
26
- name: "-(U)nUz"
27
- regex: "nız|niz|nuz|nüz"
28
- optional_letter: "ı|i|u|ü"
29
- check_harmony: true
30
-
31
- s6:
32
- name: "-(s)U"
33
- regex: "ı|i|u|ü"
34
- optional_letter: "s"
35
- check_harmony: true
36
-
37
- s7:
38
- name: "-lArI"
39
- regex: "ları|leri"
40
- optional_letter: false
41
- check_harmony: true
42
-
43
- s8:
44
- name: "-(y)U"
45
- regex: "ı|i|u|ü"
46
- optional_letter: "y"
47
- check_harmony: true
48
-
49
- s9:
50
- name: "-nU"
51
- regex: "nı|ni|nu|nü"
52
- optional_letter: false
53
- check_harmony: true
54
-
55
- s10:
56
- name: "-(n)Un"
57
- regex: "ın|in|un|ün"
58
- optional_letter: "n"
59
- check_harmony: true
60
-
61
- s11:
62
- name: "-(y)A"
63
- regex: "a|e"
64
- optional_letter: "y"
65
- check_harmony: true
66
-
67
- s12:
68
- name: "-nA"
69
- regex: "na|ne"
70
- optional_letter: false
71
- check_harmony: true
72
-
73
- s13:
74
- name: "-DA"
75
- regex: "da|de|ta|te"
76
- optional_letter: false
77
- check_harmony: true
78
-
79
- s14:
80
- name: "-nDA"
81
- regex: "nta|nte|nda|nde"
82
- optional_letter: false
83
- check_harmony: true
84
-
85
- s15:
86
- name: "-DAn"
87
- regex: "dan|tan|den|ten"
88
- optional_letter: false
89
- check_harmony: true
90
-
91
- s16:
92
- name: "-nDAn"
93
- regex: "ndan|ntan|nden|nten"
94
- optional_letter: false
95
- check_harmony: true
96
-
97
- s17:
98
- name: "-(y)lA"
99
- regex: "la|le"
100
- optional_letter: "y"
101
- check_harmony: true
102
-
103
- s18:
104
- name: "-ki"
105
- regex: "ki"
106
- optional_letter: false
107
- check_harmony: false
108
-
109
- s19:
110
- name: "-(n)cA"
111
- regex: "ca|ce"
112
- optional_letter: "n"
113
- check_harmony: true