indonesian_stemmer 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/.rvmrc +48 -0
- data/Gemfile +21 -0
- data/Guardfile +9 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +7 -0
- data/indonesian_stemmer.gemspec +23 -0
- data/lib/indonesian_stemmer.rb +51 -0
- data/lib/indonesian_stemmer/morphological_utility.rb +183 -0
- data/lib/indonesian_stemmer/stemmer_utility.rb +27 -0
- data/lib/indonesian_stemmer/version.rb +3 -0
- data/spec/indonesian_stemmer/indonesian_stemmer_spec.rb +145 -0
- data/spec/indonesian_stemmer/morphological_utility_spec.rb +630 -0
- data/spec/indonesian_stemmer/stemmer_utility_spec.rb +59 -0
- data/spec/spec_helper.rb +47 -0
- metadata +105 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe IndonesianStemmer::StemmerUtility do
|
4
|
+
before do
|
5
|
+
class AClass
|
6
|
+
include IndonesianStemmer::StemmerUtility
|
7
|
+
end
|
8
|
+
@an_object = AClass.new
|
9
|
+
@word = 'asldkamsdo'
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "the word #starts_with? prefix" do
|
13
|
+
it 'that have different letters from the prefix should return false' do
|
14
|
+
@an_object.starts_with?(@word, 3, 'ber').should be_false
|
15
|
+
end
|
16
|
+
|
17
|
+
describe "that have the same letters with the word's first letters" do
|
18
|
+
before do
|
19
|
+
@prefix = 'asld'
|
20
|
+
end
|
21
|
+
|
22
|
+
it "by exactly prefix length should be true" do
|
23
|
+
@an_object.starts_with?(@word, @prefix.size, @prefix).should be_true
|
24
|
+
end
|
25
|
+
|
26
|
+
it "by more than prefix length should still be true" do
|
27
|
+
@an_object.starts_with?(@word, @prefix.size+1, @prefix).should be_true
|
28
|
+
end
|
29
|
+
|
30
|
+
it "by less than prefix length should be false" do
|
31
|
+
@an_object.starts_with?(@word, @prefix.size-1, @prefix).should be_false
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe "the word #ends_with? suffix" do
|
37
|
+
it 'that have different letters from the suffix should return false' do
|
38
|
+
@an_object.ends_with?(@word, 3, 'abc').should be_false
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "that have the same letters with the word's last letters" do
|
42
|
+
before do
|
43
|
+
@suffix = 'amsdo'
|
44
|
+
end
|
45
|
+
|
46
|
+
it "by exactly suffix length should be true" do
|
47
|
+
@an_object.ends_with?(@word, @suffix.size, @suffix).should be_true
|
48
|
+
end
|
49
|
+
|
50
|
+
it "by more than suffix length should still be true" do
|
51
|
+
@an_object.ends_with?(@word, @suffix.size+1, @suffix).should be_true
|
52
|
+
end
|
53
|
+
|
54
|
+
it "by less than suffix length should be false" do
|
55
|
+
@an_object.ends_with?(@word, @suffix.size-1, @suffix).should be_false
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'indonesian_stemmer'
|
2
|
+
|
3
|
+
def should_stem(word, expected_word)
|
4
|
+
word.stem.should == expected_word
|
5
|
+
end
|
6
|
+
|
7
|
+
def should_transform(method_name, word, transformed_word)
|
8
|
+
IndonesianStemmer.send(method_name, word).should == transformed_word
|
9
|
+
end
|
10
|
+
|
11
|
+
def should_not_transform(method_name, word)
|
12
|
+
IndonesianStemmer.send(method_name, word).should == word
|
13
|
+
end
|
14
|
+
|
15
|
+
def should_set_flags_to(method_name, word, expected_constant)
|
16
|
+
should_set_instance_variable_to(method_name,
|
17
|
+
word,
|
18
|
+
'flags',
|
19
|
+
get_constant(expected_constant) )
|
20
|
+
end
|
21
|
+
|
22
|
+
def should_not_set_flags(method_name, word)
|
23
|
+
should_not_set_instance_variable(method_name, word, 'flags')
|
24
|
+
end
|
25
|
+
|
26
|
+
def should_set_instance_variable_to(method_name, word, variable_name, expected_value)
|
27
|
+
expect {
|
28
|
+
IndonesianStemmer.send(method_name, word)
|
29
|
+
}.to change {
|
30
|
+
IndonesianStemmer.instance_variable_get("@#{variable_name}")
|
31
|
+
}.to expected_value
|
32
|
+
end
|
33
|
+
|
34
|
+
def should_not_set_instance_variable(method_name, word, variable_name)
|
35
|
+
expect {
|
36
|
+
IndonesianStemmer.send(method_name, word)
|
37
|
+
}.to_not change {
|
38
|
+
IndonesianStemmer.instance_variable_get("@#{variable_name}") }
|
39
|
+
end
|
40
|
+
|
41
|
+
def get_constant(name, klass = IndonesianStemmer)
|
42
|
+
klass.const_get name
|
43
|
+
end
|
44
|
+
|
45
|
+
def unset_flags
|
46
|
+
IndonesianStemmer.instance_variable_set("@flags", nil)
|
47
|
+
end
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: indonesian_stemmer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Adinda Praditya
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-03-30 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rspec
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
description: Stems Indonesian words based on Porter Stemmer, with the algorithm presented
|
47
|
+
in "A Study of Stemming Effects on Information Retrieval in Bahasa Indonesia", Fadillah
|
48
|
+
Z Tala.
|
49
|
+
email:
|
50
|
+
- apraditya@gmail.com
|
51
|
+
executables: []
|
52
|
+
extensions: []
|
53
|
+
extra_rdoc_files: []
|
54
|
+
files:
|
55
|
+
- .gitignore
|
56
|
+
- .rvmrc
|
57
|
+
- Gemfile
|
58
|
+
- Guardfile
|
59
|
+
- LICENSE.txt
|
60
|
+
- README.md
|
61
|
+
- Rakefile
|
62
|
+
- indonesian_stemmer.gemspec
|
63
|
+
- lib/indonesian_stemmer.rb
|
64
|
+
- lib/indonesian_stemmer/morphological_utility.rb
|
65
|
+
- lib/indonesian_stemmer/stemmer_utility.rb
|
66
|
+
- lib/indonesian_stemmer/version.rb
|
67
|
+
- spec/indonesian_stemmer/indonesian_stemmer_spec.rb
|
68
|
+
- spec/indonesian_stemmer/morphological_utility_spec.rb
|
69
|
+
- spec/indonesian_stemmer/stemmer_utility_spec.rb
|
70
|
+
- spec/spec_helper.rb
|
71
|
+
homepage: https://github.com/apraditya/indonesian_stemmer
|
72
|
+
licenses: []
|
73
|
+
post_install_message:
|
74
|
+
rdoc_options: []
|
75
|
+
require_paths:
|
76
|
+
- lib
|
77
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
segments:
|
84
|
+
- 0
|
85
|
+
hash: 1838075541569491639
|
86
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ! '>='
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
segments:
|
93
|
+
- 0
|
94
|
+
hash: 1838075541569491639
|
95
|
+
requirements: []
|
96
|
+
rubyforge_project:
|
97
|
+
rubygems_version: 1.8.25
|
98
|
+
signing_key:
|
99
|
+
specification_version: 3
|
100
|
+
summary: Porter Stemmer for Bahasa Indonesia.
|
101
|
+
test_files:
|
102
|
+
- spec/indonesian_stemmer/indonesian_stemmer_spec.rb
|
103
|
+
- spec/indonesian_stemmer/morphological_utility_spec.rb
|
104
|
+
- spec/indonesian_stemmer/stemmer_utility_spec.rb
|
105
|
+
- spec/spec_helper.rb
|