ruby-stemmer 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +20 -22
- data/Rakefile +5 -3
- data/ruby-stemmer.c +33 -2
- metadata +17 -5
data/README
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
--
|
1
2
|
$Id: README 17 2008-01-07 16:59:10Z aurelian $
|
3
|
+
++
|
2
4
|
|
3
5
|
== About
|
4
6
|
|
@@ -46,34 +48,30 @@ see test.rb
|
|
46
48
|
|
47
49
|
== API
|
48
50
|
|
49
|
-
module Lingua
|
51
|
+
module Lingua
|
52
|
+
class Steemer
|
50
53
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
def initialize
|
57
|
-
end
|
54
|
+
# creates a new Steemer,
|
55
|
+
# defaults: language => en, encoding => UTF_8
|
56
|
+
# pass :language or :encoding to change them
|
57
|
+
def initialize
|
58
|
+
end
|
58
59
|
|
59
|
-
|
60
|
-
|
61
|
-
|
60
|
+
# stemms the word
|
61
|
+
def stem(word)
|
62
|
+
end
|
62
63
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
64
|
+
# gets the length of the last stemmed word
|
65
|
+
# same as:
|
66
|
+
# word = Lingua::Steemer.new.stem("installation") # ==> install (string)
|
67
|
+
# word.length # ==> 6 (int)
|
68
|
+
def length
|
69
|
+
end
|
68
70
|
end
|
69
|
-
|
70
71
|
end
|
71
72
|
|
72
|
-
end
|
73
|
-
|
74
73
|
== Todo
|
75
74
|
|
76
|
-
|
77
|
-
|
78
|
-
# Windows?
|
75
|
+
* Add (Array of Hashes) Lingua::Stemmer.list to list available languages/encodings
|
76
|
+
* Windows?
|
79
77
|
|
data/Rakefile
CHANGED
@@ -25,7 +25,7 @@ PKG_FILES.exclude('*.so')
|
|
25
25
|
|
26
26
|
spec = Gem::Specification.new do | s |
|
27
27
|
s.name = 'ruby-stemmer'
|
28
|
-
s.version = '0.0.
|
28
|
+
s.version = '0.0.2'
|
29
29
|
s.summary = "Stemmer implementation to ruby using libstemmer_c."
|
30
30
|
s.description = <<-EOF
|
31
31
|
Stemmer implementation to ruby using libstemmer_c.
|
@@ -33,8 +33,10 @@ spec = Gem::Specification.new do | s |
|
|
33
33
|
|
34
34
|
s.files = PKG_FILES.to_a
|
35
35
|
s.extensions << "extconf.rb"
|
36
|
-
s.has_rdoc =
|
37
|
-
|
36
|
+
s.has_rdoc = true
|
37
|
+
s.rdoc_options << '--title' << 'Ruby-Stemmer' <<
|
38
|
+
'--exclude' << 'libstemmer_c' << '--exclude' << 'extconf.rb' <<
|
39
|
+
'--exclude' << 'test.rb' << '--inline-source' << 'ruby-stemmer.c' << 'README' << '--main' << 'README'
|
38
40
|
s.author = "Aurelian Oancea"
|
39
41
|
s.email = "aurelian@locknet.ro"
|
40
42
|
s.homepage = "http://nrr.rubyforge.org"
|
data/ruby-stemmer.c
CHANGED
@@ -18,6 +18,15 @@ struct sb_stemmer_data {
|
|
18
18
|
const char * enc;
|
19
19
|
};
|
20
20
|
|
21
|
+
/*
|
22
|
+
* Document-method: new
|
23
|
+
* call-seq: Lingua::Stemmer.new
|
24
|
+
*
|
25
|
+
* Creates a new Stemmer, pass <tt>:language</tt> and <tt>:encoding</tt> as arguments
|
26
|
+
* to change encoding or language, otherwise english with UTF_8 will be used
|
27
|
+
*
|
28
|
+
* s = Lingua::Stemmer.new :language => 'fr'
|
29
|
+
*/
|
21
30
|
static VALUE
|
22
31
|
rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
|
23
32
|
VALUE roptions, rlang, renc;
|
@@ -51,7 +60,7 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
|
|
51
60
|
rb_raise(rb_eRuntimeError, "Language %s not available for stemming", RSTRING(rlang)->ptr);
|
52
61
|
exit(1);
|
53
62
|
} else {
|
54
|
-
rb_raise(rb_eRuntimeError, "Language %s not available for stemming in encoding",
|
63
|
+
rb_raise(rb_eRuntimeError, "Language %s not available for stemming in encoding %s",
|
55
64
|
RSTRING(rlang)->ptr, RSTRING(renc)->ptr);
|
56
65
|
exit(1);
|
57
66
|
}
|
@@ -66,16 +75,35 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
|
|
66
75
|
return self;
|
67
76
|
}
|
68
77
|
|
78
|
+
/*
|
79
|
+
* Document-method: stem
|
80
|
+
* call-seq: stem
|
81
|
+
*
|
82
|
+
* Stems a word
|
83
|
+
*
|
84
|
+
* s = Lingua::Stemmer.new
|
85
|
+
* s.stem "installation" # ==> install
|
86
|
+
*/
|
69
87
|
static VALUE
|
70
88
|
rb_stemmer_stem(VALUE self, VALUE word) {
|
71
89
|
struct sb_stemmer_data * sb_data;
|
72
90
|
const sb_symbol * stemmed;
|
73
91
|
GetStemmer(self, sb_data);
|
74
92
|
stemmed = sb_stemmer_stem(sb_data->stemmer, (sb_symbol *)RSTRING(word)->ptr, RSTRING(word)->len);
|
75
|
-
|
93
|
+
// printf(">>[libstemmer %s/%s]: %s-> %s\n", sb_data->lang, sb_data->enc, RSTRING(word)->ptr, stemmed);
|
76
94
|
return rb_str_new2((char *)stemmed);
|
77
95
|
}
|
78
96
|
|
97
|
+
/*
|
98
|
+
* Document-method: length
|
99
|
+
* call-seq: length
|
100
|
+
*
|
101
|
+
* Gets the length of the last stemmed word
|
102
|
+
*
|
103
|
+
* s = Lingua::Stemmer.new
|
104
|
+
* s.stem "installation"
|
105
|
+
* s.length # ==> 6
|
106
|
+
*/
|
79
107
|
static VALUE
|
80
108
|
rb_stemmer_length(VALUE self) {
|
81
109
|
struct sb_stemmer_data * sb_data;
|
@@ -97,6 +125,9 @@ sb_stemmer_alloc(VALUE klass)
|
|
97
125
|
return Data_Wrap_Struct(klass, 0, sb_stemmer_free, 0);
|
98
126
|
}
|
99
127
|
|
128
|
+
/*
|
129
|
+
* ruby-stemmer, ruby extension to SnowBall API
|
130
|
+
*/
|
100
131
|
void Init_stemmer() {
|
101
132
|
rb_mLingua = rb_define_module("Lingua");
|
102
133
|
rb_cStemmer = rb_define_class_under(rb_mLingua, "Stemmer", rb_cObject);
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-stemmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aurelian Oancea
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-01-
|
12
|
+
date: 2008-01-08 00:00:00 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -111,11 +111,23 @@ files:
|
|
111
111
|
- libstemmer_c/src_c/stem_UTF_8_swedish.h
|
112
112
|
- libstemmer_c/src_c/stem_UTF_8_turkish.c
|
113
113
|
- libstemmer_c/src_c/stem_UTF_8_turkish.h
|
114
|
-
has_rdoc:
|
114
|
+
has_rdoc: true
|
115
115
|
homepage: http://nrr.rubyforge.org
|
116
116
|
post_install_message:
|
117
|
-
rdoc_options:
|
118
|
-
|
117
|
+
rdoc_options:
|
118
|
+
- --title
|
119
|
+
- Ruby-Stemmer
|
120
|
+
- --exclude
|
121
|
+
- libstemmer_c
|
122
|
+
- --exclude
|
123
|
+
- extconf.rb
|
124
|
+
- --exclude
|
125
|
+
- test.rb
|
126
|
+
- --inline-source
|
127
|
+
- ruby-stemmer.c
|
128
|
+
- README
|
129
|
+
- --main
|
130
|
+
- README
|
119
131
|
require_paths:
|
120
132
|
- lib
|
121
133
|
required_ruby_version: !ruby/object:Gem::Requirement
|