ruby-stemmer 0.5.1 → 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest +86 -0
- data/Rakefile +13 -30
- data/libstemmer_c/examples/stemwords.c +209 -0
- data/ruby-stemmer.c +7 -7
- data/ruby-stemmer.gemspec +32 -0
- data/test.rb +1 -3
- metadata +103 -32
data/Manifest
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
extconf.rb
|
2
|
+
libstemmer_c/examples/stemwords.c
|
3
|
+
libstemmer_c/include/libstemmer.h
|
4
|
+
libstemmer_c/libstemmer/libstemmer.c
|
5
|
+
libstemmer_c/libstemmer/libstemmer_utf8.c
|
6
|
+
libstemmer_c/libstemmer/modules.h
|
7
|
+
libstemmer_c/libstemmer/modules.txt
|
8
|
+
libstemmer_c/libstemmer/modules_utf8.h
|
9
|
+
libstemmer_c/libstemmer/modules_utf8.txt
|
10
|
+
libstemmer_c/Makefile
|
11
|
+
libstemmer_c/MANIFEST
|
12
|
+
libstemmer_c/mkinc.mak
|
13
|
+
libstemmer_c/mkinc_utf8.mak
|
14
|
+
libstemmer_c/README
|
15
|
+
libstemmer_c/runtime/api.c
|
16
|
+
libstemmer_c/runtime/api.h
|
17
|
+
libstemmer_c/runtime/header.h
|
18
|
+
libstemmer_c/runtime/utilities.c
|
19
|
+
libstemmer_c/src_c/stem_ISO_8859_1_danish.c
|
20
|
+
libstemmer_c/src_c/stem_ISO_8859_1_danish.h
|
21
|
+
libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
|
22
|
+
libstemmer_c/src_c/stem_ISO_8859_1_dutch.h
|
23
|
+
libstemmer_c/src_c/stem_ISO_8859_1_english.c
|
24
|
+
libstemmer_c/src_c/stem_ISO_8859_1_english.h
|
25
|
+
libstemmer_c/src_c/stem_ISO_8859_1_finnish.c
|
26
|
+
libstemmer_c/src_c/stem_ISO_8859_1_finnish.h
|
27
|
+
libstemmer_c/src_c/stem_ISO_8859_1_french.c
|
28
|
+
libstemmer_c/src_c/stem_ISO_8859_1_french.h
|
29
|
+
libstemmer_c/src_c/stem_ISO_8859_1_german.c
|
30
|
+
libstemmer_c/src_c/stem_ISO_8859_1_german.h
|
31
|
+
libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c
|
32
|
+
libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
|
33
|
+
libstemmer_c/src_c/stem_ISO_8859_1_italian.c
|
34
|
+
libstemmer_c/src_c/stem_ISO_8859_1_italian.h
|
35
|
+
libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
|
36
|
+
libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
|
37
|
+
libstemmer_c/src_c/stem_ISO_8859_1_porter.c
|
38
|
+
libstemmer_c/src_c/stem_ISO_8859_1_porter.h
|
39
|
+
libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c
|
40
|
+
libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h
|
41
|
+
libstemmer_c/src_c/stem_ISO_8859_1_spanish.c
|
42
|
+
libstemmer_c/src_c/stem_ISO_8859_1_spanish.h
|
43
|
+
libstemmer_c/src_c/stem_ISO_8859_1_swedish.c
|
44
|
+
libstemmer_c/src_c/stem_ISO_8859_1_swedish.h
|
45
|
+
libstemmer_c/src_c/stem_ISO_8859_2_romanian.c
|
46
|
+
libstemmer_c/src_c/stem_ISO_8859_2_romanian.h
|
47
|
+
libstemmer_c/src_c/stem_KOI8_R_russian.c
|
48
|
+
libstemmer_c/src_c/stem_KOI8_R_russian.h
|
49
|
+
libstemmer_c/src_c/stem_UTF_8_danish.c
|
50
|
+
libstemmer_c/src_c/stem_UTF_8_danish.h
|
51
|
+
libstemmer_c/src_c/stem_UTF_8_dutch.c
|
52
|
+
libstemmer_c/src_c/stem_UTF_8_dutch.h
|
53
|
+
libstemmer_c/src_c/stem_UTF_8_english.c
|
54
|
+
libstemmer_c/src_c/stem_UTF_8_english.h
|
55
|
+
libstemmer_c/src_c/stem_UTF_8_finnish.c
|
56
|
+
libstemmer_c/src_c/stem_UTF_8_finnish.h
|
57
|
+
libstemmer_c/src_c/stem_UTF_8_french.c
|
58
|
+
libstemmer_c/src_c/stem_UTF_8_french.h
|
59
|
+
libstemmer_c/src_c/stem_UTF_8_german.c
|
60
|
+
libstemmer_c/src_c/stem_UTF_8_german.h
|
61
|
+
libstemmer_c/src_c/stem_UTF_8_hungarian.c
|
62
|
+
libstemmer_c/src_c/stem_UTF_8_hungarian.h
|
63
|
+
libstemmer_c/src_c/stem_UTF_8_italian.c
|
64
|
+
libstemmer_c/src_c/stem_UTF_8_italian.h
|
65
|
+
libstemmer_c/src_c/stem_UTF_8_norwegian.c
|
66
|
+
libstemmer_c/src_c/stem_UTF_8_norwegian.h
|
67
|
+
libstemmer_c/src_c/stem_UTF_8_porter.c
|
68
|
+
libstemmer_c/src_c/stem_UTF_8_porter.h
|
69
|
+
libstemmer_c/src_c/stem_UTF_8_portuguese.c
|
70
|
+
libstemmer_c/src_c/stem_UTF_8_portuguese.h
|
71
|
+
libstemmer_c/src_c/stem_UTF_8_romanian.c
|
72
|
+
libstemmer_c/src_c/stem_UTF_8_romanian.h
|
73
|
+
libstemmer_c/src_c/stem_UTF_8_russian.c
|
74
|
+
libstemmer_c/src_c/stem_UTF_8_russian.h
|
75
|
+
libstemmer_c/src_c/stem_UTF_8_spanish.c
|
76
|
+
libstemmer_c/src_c/stem_UTF_8_spanish.h
|
77
|
+
libstemmer_c/src_c/stem_UTF_8_swedish.c
|
78
|
+
libstemmer_c/src_c/stem_UTF_8_swedish.h
|
79
|
+
libstemmer_c/src_c/stem_UTF_8_turkish.c
|
80
|
+
libstemmer_c/src_c/stem_UTF_8_turkish.h
|
81
|
+
MIT-LICENSE
|
82
|
+
Rakefile
|
83
|
+
README
|
84
|
+
ruby-stemmer.c
|
85
|
+
test.rb
|
86
|
+
Manifest
|
data/Rakefile
CHANGED
@@ -4,7 +4,19 @@
|
|
4
4
|
|
5
5
|
require 'rubygems'
|
6
6
|
require 'rake'
|
7
|
-
require '
|
7
|
+
require 'echoe'
|
8
|
+
|
9
|
+
Echoe.new('ruby-stemmer', '0.5.3') do |p|
|
10
|
+
p.description = "Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1"
|
11
|
+
p.url = "http://github.com/yury/ruby-stemmer"
|
12
|
+
p.author = "Yury Korolev"
|
13
|
+
p.email = "yury.korolev@gmail.com"
|
14
|
+
p.extensions = ["extconf.rb"]
|
15
|
+
p.ignore_pattern = ["*.o", "**/*.o", "stemwords", "*.bundle", "*.a", "*.so"]
|
16
|
+
p.development_dependencies = []
|
17
|
+
p.runtime_dependencies = []
|
18
|
+
p.has_rdoc = true
|
19
|
+
end
|
8
20
|
|
9
21
|
rm_rf 'Makefile'
|
10
22
|
|
@@ -23,35 +35,6 @@ PKG_FILES.exclude('*.bundle')
|
|
23
35
|
PKG_FILES.exclude('*.a')
|
24
36
|
PKG_FILES.exclude('*.so')
|
25
37
|
|
26
|
-
spec = Gem::Specification.new do | s |
|
27
|
-
s.name = 'ruby-stemmer'
|
28
|
-
s.version = '0.5.1'
|
29
|
-
s.summary = "Stemmer implementation to ruby using SnowBall API from libstemmer_c."
|
30
|
-
s.description = <<-EOF
|
31
|
-
Stemmer implementation to ruby using libstemmer_c.
|
32
|
-
EOF
|
33
|
-
|
34
|
-
s.files = PKG_FILES.to_a
|
35
|
-
s.extensions << "extconf.rb"
|
36
|
-
s.has_rdoc = true
|
37
|
-
s.rdoc_options << '--title' << 'Ruby-Stemmer' <<
|
38
|
-
'--exclude' << 'libstemmer_c' << '--exclude' << 'extconf.rb' <<
|
39
|
-
'--exclude' << 'test.rb' << '--inline-source' << 'ruby-stemmer.c' << 'README' << '--main' << 'README'
|
40
|
-
s.author = "Aurelian Oancea"
|
41
|
-
s.email = "aurelian@locknet.ro"
|
42
|
-
s.homepage = "http://nrr.rubyforge.org"
|
43
|
-
s.rubyforge_project = "nrr"
|
44
|
-
end
|
45
|
-
|
46
|
-
pt = Rake::GemPackageTask.new(spec) do |p|
|
47
|
-
p.need_tar = true
|
48
|
-
p.need_zip = true
|
49
|
-
end
|
50
|
-
|
51
|
-
task :default do
|
52
|
-
puts "Ok"
|
53
|
-
end
|
54
|
-
|
55
38
|
task :clean do
|
56
39
|
`rm -rf Makefile mkmf.log ruby-stemmer.o stemmer.bundle stemmer.so`
|
57
40
|
`cd libstemmer_c && make clean && cd ../`
|
@@ -0,0 +1,209 @@
|
|
1
|
+
/* This is a simple program which uses libstemmer to provide a command
|
2
|
+
* line interface for stemming using any of the algorithms provided.
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include <stdio.h>
|
6
|
+
#include <stdlib.h> /* for malloc, free */
|
7
|
+
#include <string.h> /* for memmove */
|
8
|
+
#include <ctype.h> /* for isupper, tolower */
|
9
|
+
|
10
|
+
#include "libstemmer.h"
|
11
|
+
|
12
|
+
const char * progname;
|
13
|
+
static int pretty = 1;
|
14
|
+
|
15
|
+
static void
|
16
|
+
stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out)
|
17
|
+
{
|
18
|
+
#define INC 10
|
19
|
+
int lim = INC;
|
20
|
+
sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol));
|
21
|
+
|
22
|
+
while(1) {
|
23
|
+
int ch = getc(f_in);
|
24
|
+
if (ch == EOF) {
|
25
|
+
free(b); return;
|
26
|
+
}
|
27
|
+
{
|
28
|
+
int i = 0;
|
29
|
+
int inlen = 0;
|
30
|
+
while(1) {
|
31
|
+
if (ch == '\n' || ch == EOF) break;
|
32
|
+
if (i == lim) {
|
33
|
+
sb_symbol * newb;
|
34
|
+
newb = (sb_symbol *)
|
35
|
+
realloc(b, (lim + INC) * sizeof(sb_symbol));
|
36
|
+
if (newb == 0) goto error;
|
37
|
+
b = newb;
|
38
|
+
lim = lim + INC;
|
39
|
+
}
|
40
|
+
/* Update count of utf-8 characters. */
|
41
|
+
if (ch < 0x80 || ch > 0xBF) inlen += 1;
|
42
|
+
/* force lower case: */
|
43
|
+
if (isupper(ch)) ch = tolower(ch);
|
44
|
+
|
45
|
+
b[i] = ch;
|
46
|
+
i++;
|
47
|
+
ch = getc(f_in);
|
48
|
+
}
|
49
|
+
|
50
|
+
{
|
51
|
+
const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i);
|
52
|
+
if (stemmed == NULL)
|
53
|
+
{
|
54
|
+
fprintf(stderr, "Out of memory");
|
55
|
+
exit(1);
|
56
|
+
}
|
57
|
+
else
|
58
|
+
{
|
59
|
+
if (pretty == 1) {
|
60
|
+
fwrite(b, i, 1, f_out);
|
61
|
+
fputs(" -> ", f_out);
|
62
|
+
} else if (pretty == 2) {
|
63
|
+
fwrite(b, i, 1, f_out);
|
64
|
+
if (sb_stemmer_length(stemmer) > 0) {
|
65
|
+
int j;
|
66
|
+
if (inlen < 30) {
|
67
|
+
for (j = 30 - inlen; j > 0; j--)
|
68
|
+
fputs(" ", f_out);
|
69
|
+
} else {
|
70
|
+
fputs("\n", f_out);
|
71
|
+
for (j = 30; j > 0; j--)
|
72
|
+
fputs(" ", f_out);
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
fputs((char *)stemmed, f_out);
|
78
|
+
putc('\n', f_out);
|
79
|
+
}
|
80
|
+
}
|
81
|
+
}
|
82
|
+
}
|
83
|
+
error:
|
84
|
+
if (b != 0) free(b);
|
85
|
+
return;
|
86
|
+
}
|
87
|
+
|
88
|
+
/** Display the command line syntax, and then exit.
|
89
|
+
* @param n The value to exit with.
|
90
|
+
*/
|
91
|
+
static void
|
92
|
+
usage(int n)
|
93
|
+
{
|
94
|
+
printf("usage: %s [-l <language>] [-i <input file>] [-o <output file>] [-c <character encoding>] [-p[2]] [-h]\n"
|
95
|
+
"\n"
|
96
|
+
"The input file consists of a list of words to be stemmed, one per\n"
|
97
|
+
"line. Words should be in lower case, but (for English) A-Z letters\n"
|
98
|
+
"are mapped to their a-z equivalents anyway. If omitted, stdin is\n"
|
99
|
+
"used.\n"
|
100
|
+
"\n"
|
101
|
+
"If -c is given, the argument is the character encoding of the input\n"
|
102
|
+
"and output files. If it is omitted, the UTF-8 encoding is used.\n"
|
103
|
+
"\n"
|
104
|
+
"If -p is given the output file consists of each word of the input\n"
|
105
|
+
"file followed by \"->\" followed by its stemmed equivalent.\n"
|
106
|
+
"If -p2 is given the output file is a two column layout containing\n"
|
107
|
+
"the input words in the first column and the stemmed eqivalents in\n"
|
108
|
+
"the second column.\n"
|
109
|
+
"Otherwise, the output file consists of the stemmed words, one per\n"
|
110
|
+
"line.\n"
|
111
|
+
"\n"
|
112
|
+
"-h displays this help\n",
|
113
|
+
progname);
|
114
|
+
exit(n);
|
115
|
+
}
|
116
|
+
|
117
|
+
int
|
118
|
+
main(int argc, char * argv[])
|
119
|
+
{
|
120
|
+
char * in = 0;
|
121
|
+
char * out = 0;
|
122
|
+
FILE * f_in;
|
123
|
+
FILE * f_out;
|
124
|
+
struct sb_stemmer * stemmer;
|
125
|
+
|
126
|
+
char * language = "english";
|
127
|
+
char * charenc = NULL;
|
128
|
+
|
129
|
+
char * s;
|
130
|
+
int i = 1;
|
131
|
+
pretty = 0;
|
132
|
+
|
133
|
+
progname = argv[0];
|
134
|
+
|
135
|
+
while(i < argc) {
|
136
|
+
s = argv[i++];
|
137
|
+
if (s[0] == '-') {
|
138
|
+
if (strcmp(s, "-o") == 0) {
|
139
|
+
if (i >= argc) {
|
140
|
+
fprintf(stderr, "%s requires an argument\n", s);
|
141
|
+
exit(1);
|
142
|
+
}
|
143
|
+
out = argv[i++];
|
144
|
+
} else if (strcmp(s, "-i") == 0) {
|
145
|
+
if (i >= argc) {
|
146
|
+
fprintf(stderr, "%s requires an argument\n", s);
|
147
|
+
exit(1);
|
148
|
+
}
|
149
|
+
in = argv[i++];
|
150
|
+
} else if (strcmp(s, "-l") == 0) {
|
151
|
+
if (i >= argc) {
|
152
|
+
fprintf(stderr, "%s requires an argument\n", s);
|
153
|
+
exit(1);
|
154
|
+
}
|
155
|
+
language = argv[i++];
|
156
|
+
} else if (strcmp(s, "-c") == 0) {
|
157
|
+
if (i >= argc) {
|
158
|
+
fprintf(stderr, "%s requires an argument\n", s);
|
159
|
+
exit(1);
|
160
|
+
}
|
161
|
+
charenc = argv[i++];
|
162
|
+
} else if (strcmp(s, "-p2") == 0) {
|
163
|
+
pretty = 2;
|
164
|
+
} else if (strcmp(s, "-p") == 0) {
|
165
|
+
pretty = 1;
|
166
|
+
} else if (strcmp(s, "-h") == 0) {
|
167
|
+
usage(0);
|
168
|
+
} else {
|
169
|
+
fprintf(stderr, "option %s unknown\n", s);
|
170
|
+
usage(1);
|
171
|
+
}
|
172
|
+
} else {
|
173
|
+
fprintf(stderr, "unexpected parameter %s\n", s);
|
174
|
+
usage(1);
|
175
|
+
}
|
176
|
+
}
|
177
|
+
|
178
|
+
/* prepare the files */
|
179
|
+
f_in = (in == 0) ? stdin : fopen(in, "r");
|
180
|
+
if (f_in == 0) {
|
181
|
+
fprintf(stderr, "file %s not found\n", in);
|
182
|
+
exit(1);
|
183
|
+
}
|
184
|
+
f_out = (out == 0) ? stdout : fopen(out, "w");
|
185
|
+
if (f_out == 0) {
|
186
|
+
fprintf(stderr, "file %s cannot be opened\n", out);
|
187
|
+
exit(1);
|
188
|
+
}
|
189
|
+
|
190
|
+
/* do the stemming process: */
|
191
|
+
stemmer = sb_stemmer_new(language, charenc);
|
192
|
+
if (stemmer == 0) {
|
193
|
+
if (charenc == NULL) {
|
194
|
+
fprintf(stderr, "language `%s' not available for stemming\n", language);
|
195
|
+
exit(1);
|
196
|
+
} else {
|
197
|
+
fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc);
|
198
|
+
exit(1);
|
199
|
+
}
|
200
|
+
}
|
201
|
+
stem_file(stemmer, f_in, f_out);
|
202
|
+
sb_stemmer_delete(stemmer);
|
203
|
+
|
204
|
+
if (in != 0) (void) fclose(f_in);
|
205
|
+
if (out != 0) (void) fclose(f_out);
|
206
|
+
|
207
|
+
return 0;
|
208
|
+
}
|
209
|
+
|
data/ruby-stemmer.c
CHANGED
@@ -54,15 +54,15 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
|
|
54
54
|
renc = rb_str_new2("UTF_8");
|
55
55
|
}
|
56
56
|
|
57
|
-
stemmer = sb_stemmer_new(
|
57
|
+
stemmer = sb_stemmer_new( RSTRING_PTR(rlang), RSTRING_PTR(renc) );
|
58
58
|
if (stemmer == 0) {
|
59
59
|
// printf(">>[libstemmer]: got a null stemmer!\n");
|
60
60
|
if (renc == 0 ) {
|
61
|
-
rb_raise(rb_eRuntimeError, "Language %s not available for stemming",
|
61
|
+
rb_raise(rb_eRuntimeError, "Language %s not available for stemming", RSTRING_PTR(rlang));
|
62
62
|
exit(1);
|
63
63
|
} else {
|
64
64
|
rb_raise(rb_eRuntimeError, "Language %s not available for stemming in encoding %s",
|
65
|
-
|
65
|
+
RSTRING_PTR(rlang), RSTRING_PTR(renc));
|
66
66
|
exit(1);
|
67
67
|
}
|
68
68
|
}
|
@@ -70,8 +70,8 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
|
|
70
70
|
sb_data = ALLOC(struct sb_stemmer_data);
|
71
71
|
DATA_PTR(self) = sb_data;
|
72
72
|
sb_data->stemmer= stemmer;
|
73
|
-
sb_data->lang =
|
74
|
-
sb_data->enc =
|
73
|
+
sb_data->lang = RSTRING_PTR(rlang);
|
74
|
+
sb_data->enc = RSTRING_PTR(renc);
|
75
75
|
|
76
76
|
return self;
|
77
77
|
}
|
@@ -91,8 +91,8 @@ rb_stemmer_stem(VALUE self, VALUE word) {
|
|
91
91
|
struct sb_stemmer_data * sb_data;
|
92
92
|
const sb_symbol * stemmed;
|
93
93
|
GetStemmer(self, sb_data);
|
94
|
-
stemmed = sb_stemmer_stem(sb_data->stemmer, (sb_symbol *)
|
95
|
-
// printf(">>[libstemmer %s/%s]: %s-> %s\n", sb_data->lang, sb_data->enc,
|
94
|
+
stemmed = sb_stemmer_stem(sb_data->stemmer, (sb_symbol *)RSTRING_PTR(word), RSTRING_LEN(word));
|
95
|
+
// printf(">>[libstemmer %s/%s]: %s-> %s\n", sb_data->lang, sb_data->enc, RSTRING_PTR(word), stemmed);
|
96
96
|
return rb_str_new2((char *)stemmed);
|
97
97
|
}
|
98
98
|
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{ruby-stemmer}
|
5
|
+
s.version = "0.5.3"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Yury Korolev"]
|
9
|
+
s.date = %q{2009-02-10}
|
10
|
+
s.description = %q{Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1}
|
11
|
+
s.email = %q{yury.korolev@gmail.com}
|
12
|
+
s.extensions = ["extconf.rb"]
|
13
|
+
s.extra_rdoc_files = ["extconf.rb", "libstemmer_c/examples/stemwords.c", "libstemmer_c/include/libstemmer.h", "libstemmer_c/libstemmer/libstemmer.c", "libstemmer_c/libstemmer/libstemmer_utf8.c", "libstemmer_c/libstemmer/modules.h", "libstemmer_c/libstemmer/modules.txt", "libstemmer_c/libstemmer/modules_utf8.h", "libstemmer_c/libstemmer/modules_utf8.txt", "libstemmer_c/Makefile", "libstemmer_c/MANIFEST", "libstemmer_c/mkinc.mak", "libstemmer_c/mkinc_utf8.mak", "libstemmer_c/README", "libstemmer_c/runtime/api.c", "libstemmer_c/runtime/api.h", "libstemmer_c/runtime/header.h", "libstemmer_c/runtime/utilities.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.h", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.c", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.h", "libstemmer_c/src_c/stem_ISO_8859_1_english.c", "libstemmer_c/src_c/stem_ISO_8859_1_english.h", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.c", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.h", "libstemmer_c/src_c/stem_ISO_8859_1_french.c", "libstemmer_c/src_c/stem_ISO_8859_1_french.h", "libstemmer_c/src_c/stem_ISO_8859_1_german.c", "libstemmer_c/src_c/stem_ISO_8859_1_german.h", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h", "libstemmer_c/src_c/stem_ISO_8859_1_italian.c", "libstemmer_c/src_c/stem_ISO_8859_1_italian.h", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h", "libstemmer_c/src_c/stem_ISO_8859_1_porter.c", "libstemmer_c/src_c/stem_ISO_8859_1_porter.h", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.c", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.h", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.c", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.h", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.c", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.h", "libstemmer_c/src_c/stem_KOI8_R_russian.c", "libstemmer_c/src_c/stem_KOI8_R_russian.h", "libstemmer_c/src_c/stem_UTF_8_danish.c", "libstemmer_c/src_c/stem_UTF_8_danish.h", "libstemmer_c/src_c/stem_UTF_8_dutch.c", "libstemmer_c/src_c/stem_UTF_8_dutch.h", "libstemmer_c/src_c/stem_UTF_8_english.c", "libstemmer_c/src_c/stem_UTF_8_english.h", "libstemmer_c/src_c/stem_UTF_8_finnish.c", "libstemmer_c/src_c/stem_UTF_8_finnish.h", "libstemmer_c/src_c/stem_UTF_8_french.c", "libstemmer_c/src_c/stem_UTF_8_french.h", "libstemmer_c/src_c/stem_UTF_8_german.c", "libstemmer_c/src_c/stem_UTF_8_german.h", "libstemmer_c/src_c/stem_UTF_8_hungarian.c", "libstemmer_c/src_c/stem_UTF_8_hungarian.h", "libstemmer_c/src_c/stem_UTF_8_italian.c", "libstemmer_c/src_c/stem_UTF_8_italian.h", "libstemmer_c/src_c/stem_UTF_8_norwegian.c", "libstemmer_c/src_c/stem_UTF_8_norwegian.h", "libstemmer_c/src_c/stem_UTF_8_porter.c", "libstemmer_c/src_c/stem_UTF_8_porter.h", "libstemmer_c/src_c/stem_UTF_8_portuguese.c", "libstemmer_c/src_c/stem_UTF_8_portuguese.h", "libstemmer_c/src_c/stem_UTF_8_romanian.c", "libstemmer_c/src_c/stem_UTF_8_romanian.h", "libstemmer_c/src_c/stem_UTF_8_russian.c", "libstemmer_c/src_c/stem_UTF_8_russian.h", "libstemmer_c/src_c/stem_UTF_8_spanish.c", "libstemmer_c/src_c/stem_UTF_8_spanish.h", "libstemmer_c/src_c/stem_UTF_8_swedish.c", "libstemmer_c/src_c/stem_UTF_8_swedish.h", "libstemmer_c/src_c/stem_UTF_8_turkish.c", "libstemmer_c/src_c/stem_UTF_8_turkish.h", "README"]
|
14
|
+
s.files = ["extconf.rb", "libstemmer_c/examples/stemwords.c", "libstemmer_c/include/libstemmer.h", "libstemmer_c/libstemmer/libstemmer.c", "libstemmer_c/libstemmer/libstemmer_utf8.c", "libstemmer_c/libstemmer/modules.h", "libstemmer_c/libstemmer/modules.txt", "libstemmer_c/libstemmer/modules_utf8.h", "libstemmer_c/libstemmer/modules_utf8.txt", "libstemmer_c/Makefile", "libstemmer_c/MANIFEST", "libstemmer_c/mkinc.mak", "libstemmer_c/mkinc_utf8.mak", "libstemmer_c/README", "libstemmer_c/runtime/api.c", "libstemmer_c/runtime/api.h", "libstemmer_c/runtime/header.h", "libstemmer_c/runtime/utilities.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.h", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.c", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.h", "libstemmer_c/src_c/stem_ISO_8859_1_english.c", "libstemmer_c/src_c/stem_ISO_8859_1_english.h", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.c", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.h", "libstemmer_c/src_c/stem_ISO_8859_1_french.c", "libstemmer_c/src_c/stem_ISO_8859_1_french.h", "libstemmer_c/src_c/stem_ISO_8859_1_german.c", "libstemmer_c/src_c/stem_ISO_8859_1_german.h", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h", "libstemmer_c/src_c/stem_ISO_8859_1_italian.c", "libstemmer_c/src_c/stem_ISO_8859_1_italian.h", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h", "libstemmer_c/src_c/stem_ISO_8859_1_porter.c", "libstemmer_c/src_c/stem_ISO_8859_1_porter.h", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.c", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.h", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.c", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.h", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.c", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.h", "libstemmer_c/src_c/stem_KOI8_R_russian.c", "libstemmer_c/src_c/stem_KOI8_R_russian.h", "libstemmer_c/src_c/stem_UTF_8_danish.c", "libstemmer_c/src_c/stem_UTF_8_danish.h", "libstemmer_c/src_c/stem_UTF_8_dutch.c", "libstemmer_c/src_c/stem_UTF_8_dutch.h", "libstemmer_c/src_c/stem_UTF_8_english.c", "libstemmer_c/src_c/stem_UTF_8_english.h", "libstemmer_c/src_c/stem_UTF_8_finnish.c", "libstemmer_c/src_c/stem_UTF_8_finnish.h", "libstemmer_c/src_c/stem_UTF_8_french.c", "libstemmer_c/src_c/stem_UTF_8_french.h", "libstemmer_c/src_c/stem_UTF_8_german.c", "libstemmer_c/src_c/stem_UTF_8_german.h", "libstemmer_c/src_c/stem_UTF_8_hungarian.c", "libstemmer_c/src_c/stem_UTF_8_hungarian.h", "libstemmer_c/src_c/stem_UTF_8_italian.c", "libstemmer_c/src_c/stem_UTF_8_italian.h", "libstemmer_c/src_c/stem_UTF_8_norwegian.c", "libstemmer_c/src_c/stem_UTF_8_norwegian.h", "libstemmer_c/src_c/stem_UTF_8_porter.c", "libstemmer_c/src_c/stem_UTF_8_porter.h", "libstemmer_c/src_c/stem_UTF_8_portuguese.c", "libstemmer_c/src_c/stem_UTF_8_portuguese.h", "libstemmer_c/src_c/stem_UTF_8_romanian.c", "libstemmer_c/src_c/stem_UTF_8_romanian.h", "libstemmer_c/src_c/stem_UTF_8_russian.c", "libstemmer_c/src_c/stem_UTF_8_russian.h", "libstemmer_c/src_c/stem_UTF_8_spanish.c", "libstemmer_c/src_c/stem_UTF_8_spanish.h", "libstemmer_c/src_c/stem_UTF_8_swedish.c", "libstemmer_c/src_c/stem_UTF_8_swedish.h", "libstemmer_c/src_c/stem_UTF_8_turkish.c", "libstemmer_c/src_c/stem_UTF_8_turkish.h", "MIT-LICENSE", "Rakefile", "README", "ruby-stemmer.c", "test.rb", "Manifest", "ruby-stemmer.gemspec"]
|
15
|
+
s.has_rdoc = true
|
16
|
+
s.homepage = %q{http://github.com/yury/ruby-stemmer}
|
17
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Ruby-stemmer", "--main", "README"]
|
18
|
+
s.require_paths = ["lib"]
|
19
|
+
s.rubyforge_project = %q{ruby-stemmer}
|
20
|
+
s.rubygems_version = %q{1.3.1}
|
21
|
+
s.summary = %q{Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1}
|
22
|
+
|
23
|
+
if s.respond_to? :specification_version then
|
24
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
25
|
+
s.specification_version = 2
|
26
|
+
|
27
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
28
|
+
else
|
29
|
+
end
|
30
|
+
else
|
31
|
+
end
|
32
|
+
end
|
data/test.rb
CHANGED
metadata
CHANGED
@@ -1,37 +1,110 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-stemmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- Yury Korolev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-02-10 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
16
|
-
description: Stemmer implementation to ruby using libstemmer_c.
|
17
|
-
email:
|
16
|
+
description: Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1
|
17
|
+
email: yury.korolev@gmail.com
|
18
18
|
executables: []
|
19
19
|
|
20
20
|
extensions:
|
21
21
|
- extconf.rb
|
22
|
-
extra_rdoc_files:
|
23
|
-
|
24
|
-
files:
|
22
|
+
extra_rdoc_files:
|
25
23
|
- extconf.rb
|
26
|
-
-
|
27
|
-
-
|
28
|
-
-
|
29
|
-
-
|
24
|
+
- libstemmer_c/examples/stemwords.c
|
25
|
+
- libstemmer_c/include/libstemmer.h
|
26
|
+
- libstemmer_c/libstemmer/libstemmer.c
|
27
|
+
- libstemmer_c/libstemmer/libstemmer_utf8.c
|
28
|
+
- libstemmer_c/libstemmer/modules.h
|
29
|
+
- libstemmer_c/libstemmer/modules.txt
|
30
|
+
- libstemmer_c/libstemmer/modules_utf8.h
|
31
|
+
- libstemmer_c/libstemmer/modules_utf8.txt
|
32
|
+
- libstemmer_c/Makefile
|
33
|
+
- libstemmer_c/MANIFEST
|
34
|
+
- libstemmer_c/mkinc.mak
|
35
|
+
- libstemmer_c/mkinc_utf8.mak
|
36
|
+
- libstemmer_c/README
|
37
|
+
- libstemmer_c/runtime/api.c
|
38
|
+
- libstemmer_c/runtime/api.h
|
39
|
+
- libstemmer_c/runtime/header.h
|
40
|
+
- libstemmer_c/runtime/utilities.c
|
41
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_danish.c
|
42
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_danish.h
|
43
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
|
44
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_dutch.h
|
45
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_english.c
|
46
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_english.h
|
47
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_finnish.c
|
48
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_finnish.h
|
49
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_french.c
|
50
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_french.h
|
51
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_german.c
|
52
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_german.h
|
53
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c
|
54
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
|
55
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_italian.c
|
56
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_italian.h
|
57
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
|
58
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
|
59
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_porter.c
|
60
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_porter.h
|
61
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c
|
62
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h
|
63
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_spanish.c
|
64
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_spanish.h
|
65
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_swedish.c
|
66
|
+
- libstemmer_c/src_c/stem_ISO_8859_1_swedish.h
|
67
|
+
- libstemmer_c/src_c/stem_ISO_8859_2_romanian.c
|
68
|
+
- libstemmer_c/src_c/stem_ISO_8859_2_romanian.h
|
69
|
+
- libstemmer_c/src_c/stem_KOI8_R_russian.c
|
70
|
+
- libstemmer_c/src_c/stem_KOI8_R_russian.h
|
71
|
+
- libstemmer_c/src_c/stem_UTF_8_danish.c
|
72
|
+
- libstemmer_c/src_c/stem_UTF_8_danish.h
|
73
|
+
- libstemmer_c/src_c/stem_UTF_8_dutch.c
|
74
|
+
- libstemmer_c/src_c/stem_UTF_8_dutch.h
|
75
|
+
- libstemmer_c/src_c/stem_UTF_8_english.c
|
76
|
+
- libstemmer_c/src_c/stem_UTF_8_english.h
|
77
|
+
- libstemmer_c/src_c/stem_UTF_8_finnish.c
|
78
|
+
- libstemmer_c/src_c/stem_UTF_8_finnish.h
|
79
|
+
- libstemmer_c/src_c/stem_UTF_8_french.c
|
80
|
+
- libstemmer_c/src_c/stem_UTF_8_french.h
|
81
|
+
- libstemmer_c/src_c/stem_UTF_8_german.c
|
82
|
+
- libstemmer_c/src_c/stem_UTF_8_german.h
|
83
|
+
- libstemmer_c/src_c/stem_UTF_8_hungarian.c
|
84
|
+
- libstemmer_c/src_c/stem_UTF_8_hungarian.h
|
85
|
+
- libstemmer_c/src_c/stem_UTF_8_italian.c
|
86
|
+
- libstemmer_c/src_c/stem_UTF_8_italian.h
|
87
|
+
- libstemmer_c/src_c/stem_UTF_8_norwegian.c
|
88
|
+
- libstemmer_c/src_c/stem_UTF_8_norwegian.h
|
89
|
+
- libstemmer_c/src_c/stem_UTF_8_porter.c
|
90
|
+
- libstemmer_c/src_c/stem_UTF_8_porter.h
|
91
|
+
- libstemmer_c/src_c/stem_UTF_8_portuguese.c
|
92
|
+
- libstemmer_c/src_c/stem_UTF_8_portuguese.h
|
93
|
+
- libstemmer_c/src_c/stem_UTF_8_romanian.c
|
94
|
+
- libstemmer_c/src_c/stem_UTF_8_romanian.h
|
95
|
+
- libstemmer_c/src_c/stem_UTF_8_russian.c
|
96
|
+
- libstemmer_c/src_c/stem_UTF_8_russian.h
|
97
|
+
- libstemmer_c/src_c/stem_UTF_8_spanish.c
|
98
|
+
- libstemmer_c/src_c/stem_UTF_8_spanish.h
|
99
|
+
- libstemmer_c/src_c/stem_UTF_8_swedish.c
|
100
|
+
- libstemmer_c/src_c/stem_UTF_8_swedish.h
|
101
|
+
- libstemmer_c/src_c/stem_UTF_8_turkish.c
|
102
|
+
- libstemmer_c/src_c/stem_UTF_8_turkish.h
|
30
103
|
- README
|
31
|
-
|
32
|
-
-
|
104
|
+
files:
|
105
|
+
- extconf.rb
|
106
|
+
- libstemmer_c/examples/stemwords.c
|
33
107
|
- libstemmer_c/include/libstemmer.h
|
34
|
-
- libstemmer_c/libstemmer
|
35
108
|
- libstemmer_c/libstemmer/libstemmer.c
|
36
109
|
- libstemmer_c/libstemmer/libstemmer_utf8.c
|
37
110
|
- libstemmer_c/libstemmer/modules.h
|
@@ -43,12 +116,10 @@ files:
|
|
43
116
|
- libstemmer_c/mkinc.mak
|
44
117
|
- libstemmer_c/mkinc_utf8.mak
|
45
118
|
- libstemmer_c/README
|
46
|
-
- libstemmer_c/runtime
|
47
119
|
- libstemmer_c/runtime/api.c
|
48
120
|
- libstemmer_c/runtime/api.h
|
49
121
|
- libstemmer_c/runtime/header.h
|
50
122
|
- libstemmer_c/runtime/utilities.c
|
51
|
-
- libstemmer_c/src_c
|
52
123
|
- libstemmer_c/src_c/stem_ISO_8859_1_danish.c
|
53
124
|
- libstemmer_c/src_c/stem_ISO_8859_1_danish.h
|
54
125
|
- libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
|
@@ -111,21 +182,21 @@ files:
|
|
111
182
|
- libstemmer_c/src_c/stem_UTF_8_swedish.h
|
112
183
|
- libstemmer_c/src_c/stem_UTF_8_turkish.c
|
113
184
|
- libstemmer_c/src_c/stem_UTF_8_turkish.h
|
185
|
+
- MIT-LICENSE
|
186
|
+
- Rakefile
|
187
|
+
- README
|
188
|
+
- ruby-stemmer.c
|
189
|
+
- test.rb
|
190
|
+
- Manifest
|
191
|
+
- ruby-stemmer.gemspec
|
114
192
|
has_rdoc: true
|
115
|
-
homepage: http://
|
193
|
+
homepage: http://github.com/yury/ruby-stemmer
|
116
194
|
post_install_message:
|
117
195
|
rdoc_options:
|
118
|
-
- --
|
119
|
-
- Ruby-Stemmer
|
120
|
-
- --exclude
|
121
|
-
- libstemmer_c
|
122
|
-
- --exclude
|
123
|
-
- extconf.rb
|
124
|
-
- --exclude
|
125
|
-
- test.rb
|
196
|
+
- --line-numbers
|
126
197
|
- --inline-source
|
127
|
-
-
|
128
|
-
-
|
198
|
+
- --title
|
199
|
+
- Ruby-stemmer
|
129
200
|
- --main
|
130
201
|
- README
|
131
202
|
require_paths:
|
@@ -140,14 +211,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
211
|
requirements:
|
141
212
|
- - ">="
|
142
213
|
- !ruby/object:Gem::Version
|
143
|
-
version: "
|
214
|
+
version: "1.2"
|
144
215
|
version:
|
145
216
|
requirements: []
|
146
217
|
|
147
|
-
rubyforge_project:
|
148
|
-
rubygems_version: 1.
|
218
|
+
rubyforge_project: ruby-stemmer
|
219
|
+
rubygems_version: 1.3.1
|
149
220
|
signing_key:
|
150
221
|
specification_version: 2
|
151
|
-
summary: Stemmer implementation to ruby using
|
222
|
+
summary: Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1
|
152
223
|
test_files: []
|
153
224
|
|