ruby-stemmer 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +20 -22
- data/Rakefile +5 -3
- data/ruby-stemmer.c +33 -2
- metadata +17 -5
data/README
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
--
|
1
2
|
$Id: README 17 2008-01-07 16:59:10Z aurelian $
|
3
|
+
++
|
2
4
|
|
3
5
|
== About
|
4
6
|
|
@@ -46,34 +48,30 @@ see test.rb
|
|
46
48
|
|
47
49
|
== API
|
48
50
|
|
49
|
-
module Lingua
|
51
|
+
module Lingua
|
52
|
+
class Steemer
|
50
53
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
def initialize
|
57
|
-
end
|
54
|
+
# creates a new Steemer,
|
55
|
+
# defaults: language => en, encoding => UTF_8
|
56
|
+
# pass :language or :encoding to change them
|
57
|
+
def initialize
|
58
|
+
end
|
58
59
|
|
59
|
-
|
60
|
-
|
61
|
-
|
60
|
+
# stemms the word
|
61
|
+
def stem(word)
|
62
|
+
end
|
62
63
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
64
|
+
# gets the length of the last stemmed word
|
65
|
+
# same as:
|
66
|
+
# word = Lingua::Steemer.new.stem("installation") # ==> install (string)
|
67
|
+
# word.length # ==> 6 (int)
|
68
|
+
def length
|
69
|
+
end
|
68
70
|
end
|
69
|
-
|
70
71
|
end
|
71
72
|
|
72
|
-
end
|
73
|
-
|
74
73
|
== Todo
|
75
74
|
|
76
|
-
|
77
|
-
|
78
|
-
# Windows?
|
75
|
+
* Add (Array of Hashes) Lingua::Stemmer.list to list available languages/encodings
|
76
|
+
* Windows?
|
79
77
|
|
data/Rakefile
CHANGED
@@ -25,7 +25,7 @@ PKG_FILES.exclude('*.so')
|
|
25
25
|
|
26
26
|
spec = Gem::Specification.new do | s |
|
27
27
|
s.name = 'ruby-stemmer'
|
28
|
-
s.version = '0.0.
|
28
|
+
s.version = '0.0.2'
|
29
29
|
s.summary = "Stemmer implementation to ruby using libstemmer_c."
|
30
30
|
s.description = <<-EOF
|
31
31
|
Stemmer implementation to ruby using libstemmer_c.
|
@@ -33,8 +33,10 @@ spec = Gem::Specification.new do | s |
|
|
33
33
|
|
34
34
|
s.files = PKG_FILES.to_a
|
35
35
|
s.extensions << "extconf.rb"
|
36
|
-
s.has_rdoc =
|
37
|
-
|
36
|
+
s.has_rdoc = true
|
37
|
+
s.rdoc_options << '--title' << 'Ruby-Stemmer' <<
|
38
|
+
'--exclude' << 'libstemmer_c' << '--exclude' << 'extconf.rb' <<
|
39
|
+
'--exclude' << 'test.rb' << '--inline-source' << 'ruby-stemmer.c' << 'README' << '--main' << 'README'
|
38
40
|
s.author = "Aurelian Oancea"
|
39
41
|
s.email = "aurelian@locknet.ro"
|
40
42
|
s.homepage = "http://nrr.rubyforge.org"
|
data/ruby-stemmer.c
CHANGED
@@ -18,6 +18,15 @@ struct sb_stemmer_data {
|
|
18
18
|
const char * enc;
|
19
19
|
};
|
20
20
|
|
21
|
+
/*
|
22
|
+
* Document-method: new
|
23
|
+
* call-seq: Lingua::Stemmer.new
|
24
|
+
*
|
25
|
+
* Creates a new Stemmer, pass <tt>:language</tt> and <tt>:encoding</tt> as arguments
|
26
|
+
* to change encoding or language, otherwise english with UTF_8 will be used
|
27
|
+
*
|
28
|
+
* s = Lingua::Stemmer.new :language => 'fr'
|
29
|
+
*/
|
21
30
|
static VALUE
|
22
31
|
rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
|
23
32
|
VALUE roptions, rlang, renc;
|
@@ -51,7 +60,7 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
|
|
51
60
|
rb_raise(rb_eRuntimeError, "Language %s not available for stemming", RSTRING(rlang)->ptr);
|
52
61
|
exit(1);
|
53
62
|
} else {
|
54
|
-
rb_raise(rb_eRuntimeError, "Language %s not available for stemming in encoding",
|
63
|
+
rb_raise(rb_eRuntimeError, "Language %s not available for stemming in encoding %s",
|
55
64
|
RSTRING(rlang)->ptr, RSTRING(renc)->ptr);
|
56
65
|
exit(1);
|
57
66
|
}
|
@@ -66,16 +75,35 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
|
|
66
75
|
return self;
|
67
76
|
}
|
68
77
|
|
78
|
+
/*
|
79
|
+
* Document-method: stem
|
80
|
+
* call-seq: stem
|
81
|
+
*
|
82
|
+
* Stems a word
|
83
|
+
*
|
84
|
+
* s = Lingua::Stemmer.new
|
85
|
+
* s.stem "installation" # ==> install
|
86
|
+
*/
|
69
87
|
static VALUE
|
70
88
|
rb_stemmer_stem(VALUE self, VALUE word) {
|
71
89
|
struct sb_stemmer_data * sb_data;
|
72
90
|
const sb_symbol * stemmed;
|
73
91
|
GetStemmer(self, sb_data);
|
74
92
|
stemmed = sb_stemmer_stem(sb_data->stemmer, (sb_symbol *)RSTRING(word)->ptr, RSTRING(word)->len);
|
75
|
-
|
93
|
+
// printf(">>[libstemmer %s/%s]: %s-> %s\n", sb_data->lang, sb_data->enc, RSTRING(word)->ptr, stemmed);
|
76
94
|
return rb_str_new2((char *)stemmed);
|
77
95
|
}
|
78
96
|
|
97
|
+
/*
|
98
|
+
* Document-method: length
|
99
|
+
* call-seq: length
|
100
|
+
*
|
101
|
+
* Gets the length of the last stemmed word
|
102
|
+
*
|
103
|
+
* s = Lingua::Stemmer.new
|
104
|
+
* s.stem "installation"
|
105
|
+
* s.length # ==> 6
|
106
|
+
*/
|
79
107
|
static VALUE
|
80
108
|
rb_stemmer_length(VALUE self) {
|
81
109
|
struct sb_stemmer_data * sb_data;
|
@@ -97,6 +125,9 @@ sb_stemmer_alloc(VALUE klass)
|
|
97
125
|
return Data_Wrap_Struct(klass, 0, sb_stemmer_free, 0);
|
98
126
|
}
|
99
127
|
|
128
|
+
/*
|
129
|
+
* ruby-stemmer, ruby extension to SnowBall API
|
130
|
+
*/
|
100
131
|
void Init_stemmer() {
|
101
132
|
rb_mLingua = rb_define_module("Lingua");
|
102
133
|
rb_cStemmer = rb_define_class_under(rb_mLingua, "Stemmer", rb_cObject);
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-stemmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aurelian Oancea
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-01-
|
12
|
+
date: 2008-01-08 00:00:00 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -111,11 +111,23 @@ files:
|
|
111
111
|
- libstemmer_c/src_c/stem_UTF_8_swedish.h
|
112
112
|
- libstemmer_c/src_c/stem_UTF_8_turkish.c
|
113
113
|
- libstemmer_c/src_c/stem_UTF_8_turkish.h
|
114
|
-
has_rdoc:
|
114
|
+
has_rdoc: true
|
115
115
|
homepage: http://nrr.rubyforge.org
|
116
116
|
post_install_message:
|
117
|
-
rdoc_options:
|
118
|
-
|
117
|
+
rdoc_options:
|
118
|
+
- --title
|
119
|
+
- Ruby-Stemmer
|
120
|
+
- --exclude
|
121
|
+
- libstemmer_c
|
122
|
+
- --exclude
|
123
|
+
- extconf.rb
|
124
|
+
- --exclude
|
125
|
+
- test.rb
|
126
|
+
- --inline-source
|
127
|
+
- ruby-stemmer.c
|
128
|
+
- README
|
129
|
+
- --main
|
130
|
+
- README
|
119
131
|
require_paths:
|
120
132
|
- lib
|
121
133
|
required_ruby_version: !ruby/object:Gem::Requirement
|