ruby-stemmer 0.5.1 → 0.5.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,86 @@
1
+ extconf.rb
2
+ libstemmer_c/examples/stemwords.c
3
+ libstemmer_c/include/libstemmer.h
4
+ libstemmer_c/libstemmer/libstemmer.c
5
+ libstemmer_c/libstemmer/libstemmer_utf8.c
6
+ libstemmer_c/libstemmer/modules.h
7
+ libstemmer_c/libstemmer/modules.txt
8
+ libstemmer_c/libstemmer/modules_utf8.h
9
+ libstemmer_c/libstemmer/modules_utf8.txt
10
+ libstemmer_c/Makefile
11
+ libstemmer_c/MANIFEST
12
+ libstemmer_c/mkinc.mak
13
+ libstemmer_c/mkinc_utf8.mak
14
+ libstemmer_c/README
15
+ libstemmer_c/runtime/api.c
16
+ libstemmer_c/runtime/api.h
17
+ libstemmer_c/runtime/header.h
18
+ libstemmer_c/runtime/utilities.c
19
+ libstemmer_c/src_c/stem_ISO_8859_1_danish.c
20
+ libstemmer_c/src_c/stem_ISO_8859_1_danish.h
21
+ libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
22
+ libstemmer_c/src_c/stem_ISO_8859_1_dutch.h
23
+ libstemmer_c/src_c/stem_ISO_8859_1_english.c
24
+ libstemmer_c/src_c/stem_ISO_8859_1_english.h
25
+ libstemmer_c/src_c/stem_ISO_8859_1_finnish.c
26
+ libstemmer_c/src_c/stem_ISO_8859_1_finnish.h
27
+ libstemmer_c/src_c/stem_ISO_8859_1_french.c
28
+ libstemmer_c/src_c/stem_ISO_8859_1_french.h
29
+ libstemmer_c/src_c/stem_ISO_8859_1_german.c
30
+ libstemmer_c/src_c/stem_ISO_8859_1_german.h
31
+ libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c
32
+ libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
33
+ libstemmer_c/src_c/stem_ISO_8859_1_italian.c
34
+ libstemmer_c/src_c/stem_ISO_8859_1_italian.h
35
+ libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
36
+ libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
37
+ libstemmer_c/src_c/stem_ISO_8859_1_porter.c
38
+ libstemmer_c/src_c/stem_ISO_8859_1_porter.h
39
+ libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c
40
+ libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h
41
+ libstemmer_c/src_c/stem_ISO_8859_1_spanish.c
42
+ libstemmer_c/src_c/stem_ISO_8859_1_spanish.h
43
+ libstemmer_c/src_c/stem_ISO_8859_1_swedish.c
44
+ libstemmer_c/src_c/stem_ISO_8859_1_swedish.h
45
+ libstemmer_c/src_c/stem_ISO_8859_2_romanian.c
46
+ libstemmer_c/src_c/stem_ISO_8859_2_romanian.h
47
+ libstemmer_c/src_c/stem_KOI8_R_russian.c
48
+ libstemmer_c/src_c/stem_KOI8_R_russian.h
49
+ libstemmer_c/src_c/stem_UTF_8_danish.c
50
+ libstemmer_c/src_c/stem_UTF_8_danish.h
51
+ libstemmer_c/src_c/stem_UTF_8_dutch.c
52
+ libstemmer_c/src_c/stem_UTF_8_dutch.h
53
+ libstemmer_c/src_c/stem_UTF_8_english.c
54
+ libstemmer_c/src_c/stem_UTF_8_english.h
55
+ libstemmer_c/src_c/stem_UTF_8_finnish.c
56
+ libstemmer_c/src_c/stem_UTF_8_finnish.h
57
+ libstemmer_c/src_c/stem_UTF_8_french.c
58
+ libstemmer_c/src_c/stem_UTF_8_french.h
59
+ libstemmer_c/src_c/stem_UTF_8_german.c
60
+ libstemmer_c/src_c/stem_UTF_8_german.h
61
+ libstemmer_c/src_c/stem_UTF_8_hungarian.c
62
+ libstemmer_c/src_c/stem_UTF_8_hungarian.h
63
+ libstemmer_c/src_c/stem_UTF_8_italian.c
64
+ libstemmer_c/src_c/stem_UTF_8_italian.h
65
+ libstemmer_c/src_c/stem_UTF_8_norwegian.c
66
+ libstemmer_c/src_c/stem_UTF_8_norwegian.h
67
+ libstemmer_c/src_c/stem_UTF_8_porter.c
68
+ libstemmer_c/src_c/stem_UTF_8_porter.h
69
+ libstemmer_c/src_c/stem_UTF_8_portuguese.c
70
+ libstemmer_c/src_c/stem_UTF_8_portuguese.h
71
+ libstemmer_c/src_c/stem_UTF_8_romanian.c
72
+ libstemmer_c/src_c/stem_UTF_8_romanian.h
73
+ libstemmer_c/src_c/stem_UTF_8_russian.c
74
+ libstemmer_c/src_c/stem_UTF_8_russian.h
75
+ libstemmer_c/src_c/stem_UTF_8_spanish.c
76
+ libstemmer_c/src_c/stem_UTF_8_spanish.h
77
+ libstemmer_c/src_c/stem_UTF_8_swedish.c
78
+ libstemmer_c/src_c/stem_UTF_8_swedish.h
79
+ libstemmer_c/src_c/stem_UTF_8_turkish.c
80
+ libstemmer_c/src_c/stem_UTF_8_turkish.h
81
+ MIT-LICENSE
82
+ Rakefile
83
+ README
84
+ ruby-stemmer.c
85
+ test.rb
86
+ Manifest
data/Rakefile CHANGED
@@ -4,7 +4,19 @@
4
4
 
5
5
  require 'rubygems'
6
6
  require 'rake'
7
- require 'rake/gempackagetask'
7
+ require 'echoe'
8
+
9
+ Echoe.new('ruby-stemmer', '0.5.3') do |p|
10
+ p.description = "Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1"
11
+ p.url = "http://github.com/yury/ruby-stemmer"
12
+ p.author = "Yury Korolev"
13
+ p.email = "yury.korolev@gmail.com"
14
+ p.extensions = ["extconf.rb"]
15
+ p.ignore_pattern = ["*.o", "**/*.o", "stemwords", "*.bundle", "*.a", "*.so"]
16
+ p.development_dependencies = []
17
+ p.runtime_dependencies = []
18
+ p.has_rdoc = true
19
+ end
8
20
 
9
21
  rm_rf 'Makefile'
10
22
 
@@ -23,35 +35,6 @@ PKG_FILES.exclude('*.bundle')
23
35
  PKG_FILES.exclude('*.a')
24
36
  PKG_FILES.exclude('*.so')
25
37
 
26
- spec = Gem::Specification.new do | s |
27
- s.name = 'ruby-stemmer'
28
- s.version = '0.5.1'
29
- s.summary = "Stemmer implementation to ruby using SnowBall API from libstemmer_c."
30
- s.description = <<-EOF
31
- Stemmer implementation to ruby using libstemmer_c.
32
- EOF
33
-
34
- s.files = PKG_FILES.to_a
35
- s.extensions << "extconf.rb"
36
- s.has_rdoc = true
37
- s.rdoc_options << '--title' << 'Ruby-Stemmer' <<
38
- '--exclude' << 'libstemmer_c' << '--exclude' << 'extconf.rb' <<
39
- '--exclude' << 'test.rb' << '--inline-source' << 'ruby-stemmer.c' << 'README' << '--main' << 'README'
40
- s.author = "Aurelian Oancea"
41
- s.email = "aurelian@locknet.ro"
42
- s.homepage = "http://nrr.rubyforge.org"
43
- s.rubyforge_project = "nrr"
44
- end
45
-
46
- pt = Rake::GemPackageTask.new(spec) do |p|
47
- p.need_tar = true
48
- p.need_zip = true
49
- end
50
-
51
- task :default do
52
- puts "Ok"
53
- end
54
-
55
38
  task :clean do
56
39
  `rm -rf Makefile mkmf.log ruby-stemmer.o stemmer.bundle stemmer.so`
57
40
  `cd libstemmer_c && make clean && cd ../`
@@ -0,0 +1,209 @@
1
+ /* This is a simple program which uses libstemmer to provide a command
2
+ * line interface for stemming using any of the algorithms provided.
3
+ */
4
+
5
+ #include <stdio.h>
6
+ #include <stdlib.h> /* for malloc, free */
7
+ #include <string.h> /* for memmove */
8
+ #include <ctype.h> /* for isupper, tolower */
9
+
10
+ #include "libstemmer.h"
11
+
12
+ const char * progname;
13
+ static int pretty = 1;
14
+
15
+ static void
16
+ stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out)
17
+ {
18
+ #define INC 10
19
+ int lim = INC;
20
+ sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol));
21
+
22
+ while(1) {
23
+ int ch = getc(f_in);
24
+ if (ch == EOF) {
25
+ free(b); return;
26
+ }
27
+ {
28
+ int i = 0;
29
+ int inlen = 0;
30
+ while(1) {
31
+ if (ch == '\n' || ch == EOF) break;
32
+ if (i == lim) {
33
+ sb_symbol * newb;
34
+ newb = (sb_symbol *)
35
+ realloc(b, (lim + INC) * sizeof(sb_symbol));
36
+ if (newb == 0) goto error;
37
+ b = newb;
38
+ lim = lim + INC;
39
+ }
40
+ /* Update count of utf-8 characters. */
41
+ if (ch < 0x80 || ch > 0xBF) inlen += 1;
42
+ /* force lower case: */
43
+ if (isupper(ch)) ch = tolower(ch);
44
+
45
+ b[i] = ch;
46
+ i++;
47
+ ch = getc(f_in);
48
+ }
49
+
50
+ {
51
+ const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i);
52
+ if (stemmed == NULL)
53
+ {
54
+ fprintf(stderr, "Out of memory");
55
+ exit(1);
56
+ }
57
+ else
58
+ {
59
+ if (pretty == 1) {
60
+ fwrite(b, i, 1, f_out);
61
+ fputs(" -> ", f_out);
62
+ } else if (pretty == 2) {
63
+ fwrite(b, i, 1, f_out);
64
+ if (sb_stemmer_length(stemmer) > 0) {
65
+ int j;
66
+ if (inlen < 30) {
67
+ for (j = 30 - inlen; j > 0; j--)
68
+ fputs(" ", f_out);
69
+ } else {
70
+ fputs("\n", f_out);
71
+ for (j = 30; j > 0; j--)
72
+ fputs(" ", f_out);
73
+ }
74
+ }
75
+ }
76
+
77
+ fputs((char *)stemmed, f_out);
78
+ putc('\n', f_out);
79
+ }
80
+ }
81
+ }
82
+ }
83
+ error:
84
+ if (b != 0) free(b);
85
+ return;
86
+ }
87
+
88
+ /** Display the command line syntax, and then exit.
89
+ * @param n The value to exit with.
90
+ */
91
+ static void
92
+ usage(int n)
93
+ {
94
+ printf("usage: %s [-l <language>] [-i <input file>] [-o <output file>] [-c <character encoding>] [-p[2]] [-h]\n"
95
+ "\n"
96
+ "The input file consists of a list of words to be stemmed, one per\n"
97
+ "line. Words should be in lower case, but (for English) A-Z letters\n"
98
+ "are mapped to their a-z equivalents anyway. If omitted, stdin is\n"
99
+ "used.\n"
100
+ "\n"
101
+ "If -c is given, the argument is the character encoding of the input\n"
102
+ "and output files. If it is omitted, the UTF-8 encoding is used.\n"
103
+ "\n"
104
+ "If -p is given the output file consists of each word of the input\n"
105
+ "file followed by \"->\" followed by its stemmed equivalent.\n"
106
+ "If -p2 is given the output file is a two column layout containing\n"
107
+ "the input words in the first column and the stemmed eqivalents in\n"
108
+ "the second column.\n"
109
+ "Otherwise, the output file consists of the stemmed words, one per\n"
110
+ "line.\n"
111
+ "\n"
112
+ "-h displays this help\n",
113
+ progname);
114
+ exit(n);
115
+ }
116
+
117
+ int
118
+ main(int argc, char * argv[])
119
+ {
120
+ char * in = 0;
121
+ char * out = 0;
122
+ FILE * f_in;
123
+ FILE * f_out;
124
+ struct sb_stemmer * stemmer;
125
+
126
+ char * language = "english";
127
+ char * charenc = NULL;
128
+
129
+ char * s;
130
+ int i = 1;
131
+ pretty = 0;
132
+
133
+ progname = argv[0];
134
+
135
+ while(i < argc) {
136
+ s = argv[i++];
137
+ if (s[0] == '-') {
138
+ if (strcmp(s, "-o") == 0) {
139
+ if (i >= argc) {
140
+ fprintf(stderr, "%s requires an argument\n", s);
141
+ exit(1);
142
+ }
143
+ out = argv[i++];
144
+ } else if (strcmp(s, "-i") == 0) {
145
+ if (i >= argc) {
146
+ fprintf(stderr, "%s requires an argument\n", s);
147
+ exit(1);
148
+ }
149
+ in = argv[i++];
150
+ } else if (strcmp(s, "-l") == 0) {
151
+ if (i >= argc) {
152
+ fprintf(stderr, "%s requires an argument\n", s);
153
+ exit(1);
154
+ }
155
+ language = argv[i++];
156
+ } else if (strcmp(s, "-c") == 0) {
157
+ if (i >= argc) {
158
+ fprintf(stderr, "%s requires an argument\n", s);
159
+ exit(1);
160
+ }
161
+ charenc = argv[i++];
162
+ } else if (strcmp(s, "-p2") == 0) {
163
+ pretty = 2;
164
+ } else if (strcmp(s, "-p") == 0) {
165
+ pretty = 1;
166
+ } else if (strcmp(s, "-h") == 0) {
167
+ usage(0);
168
+ } else {
169
+ fprintf(stderr, "option %s unknown\n", s);
170
+ usage(1);
171
+ }
172
+ } else {
173
+ fprintf(stderr, "unexpected parameter %s\n", s);
174
+ usage(1);
175
+ }
176
+ }
177
+
178
+ /* prepare the files */
179
+ f_in = (in == 0) ? stdin : fopen(in, "r");
180
+ if (f_in == 0) {
181
+ fprintf(stderr, "file %s not found\n", in);
182
+ exit(1);
183
+ }
184
+ f_out = (out == 0) ? stdout : fopen(out, "w");
185
+ if (f_out == 0) {
186
+ fprintf(stderr, "file %s cannot be opened\n", out);
187
+ exit(1);
188
+ }
189
+
190
+ /* do the stemming process: */
191
+ stemmer = sb_stemmer_new(language, charenc);
192
+ if (stemmer == 0) {
193
+ if (charenc == NULL) {
194
+ fprintf(stderr, "language `%s' not available for stemming\n", language);
195
+ exit(1);
196
+ } else {
197
+ fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc);
198
+ exit(1);
199
+ }
200
+ }
201
+ stem_file(stemmer, f_in, f_out);
202
+ sb_stemmer_delete(stemmer);
203
+
204
+ if (in != 0) (void) fclose(f_in);
205
+ if (out != 0) (void) fclose(f_out);
206
+
207
+ return 0;
208
+ }
209
+
@@ -54,15 +54,15 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
54
54
  renc = rb_str_new2("UTF_8");
55
55
  }
56
56
 
57
- stemmer = sb_stemmer_new( RSTRING(rlang)->ptr, RSTRING(renc)->ptr );
57
+ stemmer = sb_stemmer_new( RSTRING_PTR(rlang), RSTRING_PTR(renc) );
58
58
  if (stemmer == 0) {
59
59
  // printf(">>[libstemmer]: got a null stemmer!\n");
60
60
  if (renc == 0 ) {
61
- rb_raise(rb_eRuntimeError, "Language %s not available for stemming", RSTRING(rlang)->ptr);
61
+ rb_raise(rb_eRuntimeError, "Language %s not available for stemming", RSTRING_PTR(rlang));
62
62
  exit(1);
63
63
  } else {
64
64
  rb_raise(rb_eRuntimeError, "Language %s not available for stemming in encoding %s",
65
- RSTRING(rlang)->ptr, RSTRING(renc)->ptr);
65
+ RSTRING_PTR(rlang), RSTRING_PTR(renc));
66
66
  exit(1);
67
67
  }
68
68
  }
@@ -70,8 +70,8 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
70
70
  sb_data = ALLOC(struct sb_stemmer_data);
71
71
  DATA_PTR(self) = sb_data;
72
72
  sb_data->stemmer= stemmer;
73
- sb_data->lang = RSTRING(rlang)->ptr;
74
- sb_data->enc = RSTRING(renc)->ptr;
73
+ sb_data->lang = RSTRING_PTR(rlang);
74
+ sb_data->enc = RSTRING_PTR(renc);
75
75
 
76
76
  return self;
77
77
  }
@@ -91,8 +91,8 @@ rb_stemmer_stem(VALUE self, VALUE word) {
91
91
  struct sb_stemmer_data * sb_data;
92
92
  const sb_symbol * stemmed;
93
93
  GetStemmer(self, sb_data);
94
- stemmed = sb_stemmer_stem(sb_data->stemmer, (sb_symbol *)RSTRING(word)->ptr, RSTRING(word)->len);
95
- // printf(">>[libstemmer %s/%s]: %s-> %s\n", sb_data->lang, sb_data->enc, RSTRING(word)->ptr, stemmed);
94
+ stemmed = sb_stemmer_stem(sb_data->stemmer, (sb_symbol *)RSTRING_PTR(word), RSTRING_LEN(word));
95
+ // printf(">>[libstemmer %s/%s]: %s-> %s\n", sb_data->lang, sb_data->enc, RSTRING_PTR(word), stemmed);
96
96
  return rb_str_new2((char *)stemmed);
97
97
  }
98
98
 
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{ruby-stemmer}
5
+ s.version = "0.5.3"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Yury Korolev"]
9
+ s.date = %q{2009-02-10}
10
+ s.description = %q{Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1}
11
+ s.email = %q{yury.korolev@gmail.com}
12
+ s.extensions = ["extconf.rb"]
13
+ s.extra_rdoc_files = ["extconf.rb", "libstemmer_c/examples/stemwords.c", "libstemmer_c/include/libstemmer.h", "libstemmer_c/libstemmer/libstemmer.c", "libstemmer_c/libstemmer/libstemmer_utf8.c", "libstemmer_c/libstemmer/modules.h", "libstemmer_c/libstemmer/modules.txt", "libstemmer_c/libstemmer/modules_utf8.h", "libstemmer_c/libstemmer/modules_utf8.txt", "libstemmer_c/Makefile", "libstemmer_c/MANIFEST", "libstemmer_c/mkinc.mak", "libstemmer_c/mkinc_utf8.mak", "libstemmer_c/README", "libstemmer_c/runtime/api.c", "libstemmer_c/runtime/api.h", "libstemmer_c/runtime/header.h", "libstemmer_c/runtime/utilities.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.h", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.c", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.h", "libstemmer_c/src_c/stem_ISO_8859_1_english.c", "libstemmer_c/src_c/stem_ISO_8859_1_english.h", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.c", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.h", "libstemmer_c/src_c/stem_ISO_8859_1_french.c", "libstemmer_c/src_c/stem_ISO_8859_1_french.h", "libstemmer_c/src_c/stem_ISO_8859_1_german.c", "libstemmer_c/src_c/stem_ISO_8859_1_german.h", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h", "libstemmer_c/src_c/stem_ISO_8859_1_italian.c", "libstemmer_c/src_c/stem_ISO_8859_1_italian.h", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h", "libstemmer_c/src_c/stem_ISO_8859_1_porter.c", "libstemmer_c/src_c/stem_ISO_8859_1_porter.h", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.c", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.h", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.c", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.h", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.c", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.h", "libstemmer_c/src_c/stem_KOI8_R_russian.c", "libstemmer_c/src_c/stem_KOI8_R_russian.h", "libstemmer_c/src_c/stem_UTF_8_danish.c", "libstemmer_c/src_c/stem_UTF_8_danish.h", "libstemmer_c/src_c/stem_UTF_8_dutch.c", "libstemmer_c/src_c/stem_UTF_8_dutch.h", "libstemmer_c/src_c/stem_UTF_8_english.c", "libstemmer_c/src_c/stem_UTF_8_english.h", "libstemmer_c/src_c/stem_UTF_8_finnish.c", "libstemmer_c/src_c/stem_UTF_8_finnish.h", "libstemmer_c/src_c/stem_UTF_8_french.c", "libstemmer_c/src_c/stem_UTF_8_french.h", "libstemmer_c/src_c/stem_UTF_8_german.c", "libstemmer_c/src_c/stem_UTF_8_german.h", "libstemmer_c/src_c/stem_UTF_8_hungarian.c", "libstemmer_c/src_c/stem_UTF_8_hungarian.h", "libstemmer_c/src_c/stem_UTF_8_italian.c", "libstemmer_c/src_c/stem_UTF_8_italian.h", "libstemmer_c/src_c/stem_UTF_8_norwegian.c", "libstemmer_c/src_c/stem_UTF_8_norwegian.h", "libstemmer_c/src_c/stem_UTF_8_porter.c", "libstemmer_c/src_c/stem_UTF_8_porter.h", "libstemmer_c/src_c/stem_UTF_8_portuguese.c", "libstemmer_c/src_c/stem_UTF_8_portuguese.h", "libstemmer_c/src_c/stem_UTF_8_romanian.c", "libstemmer_c/src_c/stem_UTF_8_romanian.h", "libstemmer_c/src_c/stem_UTF_8_russian.c", "libstemmer_c/src_c/stem_UTF_8_russian.h", "libstemmer_c/src_c/stem_UTF_8_spanish.c", "libstemmer_c/src_c/stem_UTF_8_spanish.h", "libstemmer_c/src_c/stem_UTF_8_swedish.c", "libstemmer_c/src_c/stem_UTF_8_swedish.h", "libstemmer_c/src_c/stem_UTF_8_turkish.c", "libstemmer_c/src_c/stem_UTF_8_turkish.h", "README"]
14
+ s.files = ["extconf.rb", "libstemmer_c/examples/stemwords.c", "libstemmer_c/include/libstemmer.h", "libstemmer_c/libstemmer/libstemmer.c", "libstemmer_c/libstemmer/libstemmer_utf8.c", "libstemmer_c/libstemmer/modules.h", "libstemmer_c/libstemmer/modules.txt", "libstemmer_c/libstemmer/modules_utf8.h", "libstemmer_c/libstemmer/modules_utf8.txt", "libstemmer_c/Makefile", "libstemmer_c/MANIFEST", "libstemmer_c/mkinc.mak", "libstemmer_c/mkinc_utf8.mak", "libstemmer_c/README", "libstemmer_c/runtime/api.c", "libstemmer_c/runtime/api.h", "libstemmer_c/runtime/header.h", "libstemmer_c/runtime/utilities.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.h", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.c", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.h", "libstemmer_c/src_c/stem_ISO_8859_1_english.c", "libstemmer_c/src_c/stem_ISO_8859_1_english.h", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.c", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.h", "libstemmer_c/src_c/stem_ISO_8859_1_french.c", "libstemmer_c/src_c/stem_ISO_8859_1_french.h", "libstemmer_c/src_c/stem_ISO_8859_1_german.c", "libstemmer_c/src_c/stem_ISO_8859_1_german.h", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h", "libstemmer_c/src_c/stem_ISO_8859_1_italian.c", "libstemmer_c/src_c/stem_ISO_8859_1_italian.h", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h", "libstemmer_c/src_c/stem_ISO_8859_1_porter.c", "libstemmer_c/src_c/stem_ISO_8859_1_porter.h", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.c", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.h", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.c", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.h", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.c", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.h", "libstemmer_c/src_c/stem_KOI8_R_russian.c", "libstemmer_c/src_c/stem_KOI8_R_russian.h", "libstemmer_c/src_c/stem_UTF_8_danish.c", "libstemmer_c/src_c/stem_UTF_8_danish.h", "libstemmer_c/src_c/stem_UTF_8_dutch.c", "libstemmer_c/src_c/stem_UTF_8_dutch.h", "libstemmer_c/src_c/stem_UTF_8_english.c", "libstemmer_c/src_c/stem_UTF_8_english.h", "libstemmer_c/src_c/stem_UTF_8_finnish.c", "libstemmer_c/src_c/stem_UTF_8_finnish.h", "libstemmer_c/src_c/stem_UTF_8_french.c", "libstemmer_c/src_c/stem_UTF_8_french.h", "libstemmer_c/src_c/stem_UTF_8_german.c", "libstemmer_c/src_c/stem_UTF_8_german.h", "libstemmer_c/src_c/stem_UTF_8_hungarian.c", "libstemmer_c/src_c/stem_UTF_8_hungarian.h", "libstemmer_c/src_c/stem_UTF_8_italian.c", "libstemmer_c/src_c/stem_UTF_8_italian.h", "libstemmer_c/src_c/stem_UTF_8_norwegian.c", "libstemmer_c/src_c/stem_UTF_8_norwegian.h", "libstemmer_c/src_c/stem_UTF_8_porter.c", "libstemmer_c/src_c/stem_UTF_8_porter.h", "libstemmer_c/src_c/stem_UTF_8_portuguese.c", "libstemmer_c/src_c/stem_UTF_8_portuguese.h", "libstemmer_c/src_c/stem_UTF_8_romanian.c", "libstemmer_c/src_c/stem_UTF_8_romanian.h", "libstemmer_c/src_c/stem_UTF_8_russian.c", "libstemmer_c/src_c/stem_UTF_8_russian.h", "libstemmer_c/src_c/stem_UTF_8_spanish.c", "libstemmer_c/src_c/stem_UTF_8_spanish.h", "libstemmer_c/src_c/stem_UTF_8_swedish.c", "libstemmer_c/src_c/stem_UTF_8_swedish.h", "libstemmer_c/src_c/stem_UTF_8_turkish.c", "libstemmer_c/src_c/stem_UTF_8_turkish.h", "MIT-LICENSE", "Rakefile", "README", "ruby-stemmer.c", "test.rb", "Manifest", "ruby-stemmer.gemspec"]
15
+ s.has_rdoc = true
16
+ s.homepage = %q{http://github.com/yury/ruby-stemmer}
17
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Ruby-stemmer", "--main", "README"]
18
+ s.require_paths = ["lib"]
19
+ s.rubyforge_project = %q{ruby-stemmer}
20
+ s.rubygems_version = %q{1.3.1}
21
+ s.summary = %q{Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1}
22
+
23
+ if s.respond_to? :specification_version then
24
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
25
+ s.specification_version = 2
26
+
27
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
28
+ else
29
+ end
30
+ else
31
+ end
32
+ end
data/test.rb CHANGED
@@ -1,11 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
-
2
+ # coding:utf-8
3
3
  #
4
4
  # $Id: test.rb 21 2008-04-30 10:57:37Z aurelian $
5
5
  #
6
6
 
7
- $kcode = "utf-8"
8
-
9
7
  require "stemmer.so"
10
8
 
11
9
  #puts "installation".stem
metadata CHANGED
@@ -1,37 +1,110 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-stemmer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
- - Aurelian Oancea
7
+ - Yury Korolev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-05-10 00:00:00 +02:00
12
+ date: 2009-02-10 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
16
- description: Stemmer implementation to ruby using libstemmer_c.
17
- email: aurelian@locknet.ro
16
+ description: Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1
17
+ email: yury.korolev@gmail.com
18
18
  executables: []
19
19
 
20
20
  extensions:
21
21
  - extconf.rb
22
- extra_rdoc_files: []
23
-
24
- files:
22
+ extra_rdoc_files:
25
23
  - extconf.rb
26
- - ruby-stemmer.c
27
- - test.rb
28
- - MIT-LICENSE
29
- - Rakefile
24
+ - libstemmer_c/examples/stemwords.c
25
+ - libstemmer_c/include/libstemmer.h
26
+ - libstemmer_c/libstemmer/libstemmer.c
27
+ - libstemmer_c/libstemmer/libstemmer_utf8.c
28
+ - libstemmer_c/libstemmer/modules.h
29
+ - libstemmer_c/libstemmer/modules.txt
30
+ - libstemmer_c/libstemmer/modules_utf8.h
31
+ - libstemmer_c/libstemmer/modules_utf8.txt
32
+ - libstemmer_c/Makefile
33
+ - libstemmer_c/MANIFEST
34
+ - libstemmer_c/mkinc.mak
35
+ - libstemmer_c/mkinc_utf8.mak
36
+ - libstemmer_c/README
37
+ - libstemmer_c/runtime/api.c
38
+ - libstemmer_c/runtime/api.h
39
+ - libstemmer_c/runtime/header.h
40
+ - libstemmer_c/runtime/utilities.c
41
+ - libstemmer_c/src_c/stem_ISO_8859_1_danish.c
42
+ - libstemmer_c/src_c/stem_ISO_8859_1_danish.h
43
+ - libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
44
+ - libstemmer_c/src_c/stem_ISO_8859_1_dutch.h
45
+ - libstemmer_c/src_c/stem_ISO_8859_1_english.c
46
+ - libstemmer_c/src_c/stem_ISO_8859_1_english.h
47
+ - libstemmer_c/src_c/stem_ISO_8859_1_finnish.c
48
+ - libstemmer_c/src_c/stem_ISO_8859_1_finnish.h
49
+ - libstemmer_c/src_c/stem_ISO_8859_1_french.c
50
+ - libstemmer_c/src_c/stem_ISO_8859_1_french.h
51
+ - libstemmer_c/src_c/stem_ISO_8859_1_german.c
52
+ - libstemmer_c/src_c/stem_ISO_8859_1_german.h
53
+ - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c
54
+ - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
55
+ - libstemmer_c/src_c/stem_ISO_8859_1_italian.c
56
+ - libstemmer_c/src_c/stem_ISO_8859_1_italian.h
57
+ - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
58
+ - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
59
+ - libstemmer_c/src_c/stem_ISO_8859_1_porter.c
60
+ - libstemmer_c/src_c/stem_ISO_8859_1_porter.h
61
+ - libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c
62
+ - libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h
63
+ - libstemmer_c/src_c/stem_ISO_8859_1_spanish.c
64
+ - libstemmer_c/src_c/stem_ISO_8859_1_spanish.h
65
+ - libstemmer_c/src_c/stem_ISO_8859_1_swedish.c
66
+ - libstemmer_c/src_c/stem_ISO_8859_1_swedish.h
67
+ - libstemmer_c/src_c/stem_ISO_8859_2_romanian.c
68
+ - libstemmer_c/src_c/stem_ISO_8859_2_romanian.h
69
+ - libstemmer_c/src_c/stem_KOI8_R_russian.c
70
+ - libstemmer_c/src_c/stem_KOI8_R_russian.h
71
+ - libstemmer_c/src_c/stem_UTF_8_danish.c
72
+ - libstemmer_c/src_c/stem_UTF_8_danish.h
73
+ - libstemmer_c/src_c/stem_UTF_8_dutch.c
74
+ - libstemmer_c/src_c/stem_UTF_8_dutch.h
75
+ - libstemmer_c/src_c/stem_UTF_8_english.c
76
+ - libstemmer_c/src_c/stem_UTF_8_english.h
77
+ - libstemmer_c/src_c/stem_UTF_8_finnish.c
78
+ - libstemmer_c/src_c/stem_UTF_8_finnish.h
79
+ - libstemmer_c/src_c/stem_UTF_8_french.c
80
+ - libstemmer_c/src_c/stem_UTF_8_french.h
81
+ - libstemmer_c/src_c/stem_UTF_8_german.c
82
+ - libstemmer_c/src_c/stem_UTF_8_german.h
83
+ - libstemmer_c/src_c/stem_UTF_8_hungarian.c
84
+ - libstemmer_c/src_c/stem_UTF_8_hungarian.h
85
+ - libstemmer_c/src_c/stem_UTF_8_italian.c
86
+ - libstemmer_c/src_c/stem_UTF_8_italian.h
87
+ - libstemmer_c/src_c/stem_UTF_8_norwegian.c
88
+ - libstemmer_c/src_c/stem_UTF_8_norwegian.h
89
+ - libstemmer_c/src_c/stem_UTF_8_porter.c
90
+ - libstemmer_c/src_c/stem_UTF_8_porter.h
91
+ - libstemmer_c/src_c/stem_UTF_8_portuguese.c
92
+ - libstemmer_c/src_c/stem_UTF_8_portuguese.h
93
+ - libstemmer_c/src_c/stem_UTF_8_romanian.c
94
+ - libstemmer_c/src_c/stem_UTF_8_romanian.h
95
+ - libstemmer_c/src_c/stem_UTF_8_russian.c
96
+ - libstemmer_c/src_c/stem_UTF_8_russian.h
97
+ - libstemmer_c/src_c/stem_UTF_8_spanish.c
98
+ - libstemmer_c/src_c/stem_UTF_8_spanish.h
99
+ - libstemmer_c/src_c/stem_UTF_8_swedish.c
100
+ - libstemmer_c/src_c/stem_UTF_8_swedish.h
101
+ - libstemmer_c/src_c/stem_UTF_8_turkish.c
102
+ - libstemmer_c/src_c/stem_UTF_8_turkish.h
30
103
  - README
31
- - libstemmer_c/examples
32
- - libstemmer_c/include
104
+ files:
105
+ - extconf.rb
106
+ - libstemmer_c/examples/stemwords.c
33
107
  - libstemmer_c/include/libstemmer.h
34
- - libstemmer_c/libstemmer
35
108
  - libstemmer_c/libstemmer/libstemmer.c
36
109
  - libstemmer_c/libstemmer/libstemmer_utf8.c
37
110
  - libstemmer_c/libstemmer/modules.h
@@ -43,12 +116,10 @@ files:
43
116
  - libstemmer_c/mkinc.mak
44
117
  - libstemmer_c/mkinc_utf8.mak
45
118
  - libstemmer_c/README
46
- - libstemmer_c/runtime
47
119
  - libstemmer_c/runtime/api.c
48
120
  - libstemmer_c/runtime/api.h
49
121
  - libstemmer_c/runtime/header.h
50
122
  - libstemmer_c/runtime/utilities.c
51
- - libstemmer_c/src_c
52
123
  - libstemmer_c/src_c/stem_ISO_8859_1_danish.c
53
124
  - libstemmer_c/src_c/stem_ISO_8859_1_danish.h
54
125
  - libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
@@ -111,21 +182,21 @@ files:
111
182
  - libstemmer_c/src_c/stem_UTF_8_swedish.h
112
183
  - libstemmer_c/src_c/stem_UTF_8_turkish.c
113
184
  - libstemmer_c/src_c/stem_UTF_8_turkish.h
185
+ - MIT-LICENSE
186
+ - Rakefile
187
+ - README
188
+ - ruby-stemmer.c
189
+ - test.rb
190
+ - Manifest
191
+ - ruby-stemmer.gemspec
114
192
  has_rdoc: true
115
- homepage: http://nrr.rubyforge.org
193
+ homepage: http://github.com/yury/ruby-stemmer
116
194
  post_install_message:
117
195
  rdoc_options:
118
- - --title
119
- - Ruby-Stemmer
120
- - --exclude
121
- - libstemmer_c
122
- - --exclude
123
- - extconf.rb
124
- - --exclude
125
- - test.rb
196
+ - --line-numbers
126
197
  - --inline-source
127
- - ruby-stemmer.c
128
- - README
198
+ - --title
199
+ - Ruby-stemmer
129
200
  - --main
130
201
  - README
131
202
  require_paths:
@@ -140,14 +211,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
211
  requirements:
141
212
  - - ">="
142
213
  - !ruby/object:Gem::Version
143
- version: "0"
214
+ version: "1.2"
144
215
  version:
145
216
  requirements: []
146
217
 
147
- rubyforge_project: nrr
148
- rubygems_version: 1.0.1
218
+ rubyforge_project: ruby-stemmer
219
+ rubygems_version: 1.3.1
149
220
  signing_key:
150
221
  specification_version: 2
151
- summary: Stemmer implementation to ruby using SnowBall API from libstemmer_c.
222
+ summary: Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1
152
223
  test_files: []
153
224