indonesian_stemmer 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/indonesian_stemmer.rb +14 -5
- data/lib/indonesian_stemmer/version.rb +1 -1
- data/spec/{indonesian_stemmer/indonesian_stemmer_spec.rb → indonesian_stemmer_spec.rb} +11 -0
- data/spec/{indonesian_stemmer → lib/indonesian_stemmer}/morphological_utility_spec.rb +0 -0
- data/spec/{indonesian_stemmer → lib/indonesian_stemmer}/stemmer_utility_spec.rb +0 -0
- metadata +10 -10
data/lib/indonesian_stemmer.rb
CHANGED
@@ -10,14 +10,19 @@ module IndonesianStemmer
|
|
10
10
|
|
11
11
|
def stem(word, derivational_stemming = true)
|
12
12
|
@flags = 0
|
13
|
-
@number_of_syllables = total_syllables word
|
14
13
|
|
15
|
-
|
16
|
-
|
14
|
+
if word =~ /\s/
|
15
|
+
word.split(' ').map { |w| stem(w) }
|
16
|
+
else
|
17
|
+
@number_of_syllables = total_syllables word
|
17
18
|
|
18
|
-
|
19
|
+
remove_particle(word) if still_has_many_syllables?
|
20
|
+
remove_possessive_pronoun(word) if still_has_many_syllables?
|
19
21
|
|
20
|
-
|
22
|
+
stem_derivational(word) if derivational_stemming
|
23
|
+
|
24
|
+
word
|
25
|
+
end
|
21
26
|
end
|
22
27
|
|
23
28
|
|
@@ -46,6 +51,10 @@ end
|
|
46
51
|
|
47
52
|
class String
|
48
53
|
def stem
|
54
|
+
IndonesianStemmer.stem(self.dup)
|
55
|
+
end
|
56
|
+
|
57
|
+
def stem!
|
49
58
|
IndonesianStemmer.stem(self)
|
50
59
|
end
|
51
60
|
end
|
@@ -1,6 +1,17 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe IndonesianStemmer do
|
4
|
+
describe "Regarding number words passed" do
|
5
|
+
it 'one word should return the stemmed word' do
|
6
|
+
IndonesianStemmer.stem('bukukah').should be_a(String)
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
it 'many words should return an array of stemmed words' do
|
11
|
+
IndonesianStemmer.stem('bagaimanapun mencintai').should_not be_empty
|
12
|
+
IndonesianStemmer.stem('melihat menduga').should == ['lihat', 'duga']
|
13
|
+
end
|
14
|
+
end
|
4
15
|
describe "covering the inflectional particles" do
|
5
16
|
describe "'kah'" do
|
6
17
|
it { should_stem 'bukukah', 'buku' }
|
File without changes
|
File without changes
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: indonesian_stemmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-04-
|
12
|
+
date: 2013-04-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -64,9 +64,9 @@ files:
|
|
64
64
|
- lib/indonesian_stemmer/morphological_utility.rb
|
65
65
|
- lib/indonesian_stemmer/stemmer_utility.rb
|
66
66
|
- lib/indonesian_stemmer/version.rb
|
67
|
-
- spec/
|
68
|
-
- spec/indonesian_stemmer/morphological_utility_spec.rb
|
69
|
-
- spec/indonesian_stemmer/stemmer_utility_spec.rb
|
67
|
+
- spec/indonesian_stemmer_spec.rb
|
68
|
+
- spec/lib/indonesian_stemmer/morphological_utility_spec.rb
|
69
|
+
- spec/lib/indonesian_stemmer/stemmer_utility_spec.rb
|
70
70
|
- spec/spec_helper.rb
|
71
71
|
homepage: https://github.com/apraditya/indonesian_stemmer
|
72
72
|
licenses: []
|
@@ -82,7 +82,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
82
82
|
version: '0'
|
83
83
|
segments:
|
84
84
|
- 0
|
85
|
-
hash:
|
85
|
+
hash: 2800268474079069831
|
86
86
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
87
|
none: false
|
88
88
|
requirements:
|
@@ -91,7 +91,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
91
91
|
version: '0'
|
92
92
|
segments:
|
93
93
|
- 0
|
94
|
-
hash:
|
94
|
+
hash: 2800268474079069831
|
95
95
|
requirements: []
|
96
96
|
rubyforge_project:
|
97
97
|
rubygems_version: 1.8.25
|
@@ -99,7 +99,7 @@ signing_key:
|
|
99
99
|
specification_version: 3
|
100
100
|
summary: Porter Stemmer for Bahasa Indonesia.
|
101
101
|
test_files:
|
102
|
-
- spec/
|
103
|
-
- spec/indonesian_stemmer/morphological_utility_spec.rb
|
104
|
-
- spec/indonesian_stemmer/stemmer_utility_spec.rb
|
102
|
+
- spec/indonesian_stemmer_spec.rb
|
103
|
+
- spec/lib/indonesian_stemmer/morphological_utility_spec.rb
|
104
|
+
- spec/lib/indonesian_stemmer/stemmer_utility_spec.rb
|
105
105
|
- spec/spec_helper.rb
|