ruby-stemmer 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,86 @@
1
+ extconf.rb
2
+ libstemmer_c/examples/stemwords.c
3
+ libstemmer_c/include/libstemmer.h
4
+ libstemmer_c/libstemmer/libstemmer.c
5
+ libstemmer_c/libstemmer/libstemmer_utf8.c
6
+ libstemmer_c/libstemmer/modules.h
7
+ libstemmer_c/libstemmer/modules.txt
8
+ libstemmer_c/libstemmer/modules_utf8.h
9
+ libstemmer_c/libstemmer/modules_utf8.txt
10
+ libstemmer_c/Makefile
11
+ libstemmer_c/MANIFEST
12
+ libstemmer_c/mkinc.mak
13
+ libstemmer_c/mkinc_utf8.mak
14
+ libstemmer_c/README
15
+ libstemmer_c/runtime/api.c
16
+ libstemmer_c/runtime/api.h
17
+ libstemmer_c/runtime/header.h
18
+ libstemmer_c/runtime/utilities.c
19
+ libstemmer_c/src_c/stem_ISO_8859_1_danish.c
20
+ libstemmer_c/src_c/stem_ISO_8859_1_danish.h
21
+ libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
22
+ libstemmer_c/src_c/stem_ISO_8859_1_dutch.h
23
+ libstemmer_c/src_c/stem_ISO_8859_1_english.c
24
+ libstemmer_c/src_c/stem_ISO_8859_1_english.h
25
+ libstemmer_c/src_c/stem_ISO_8859_1_finnish.c
26
+ libstemmer_c/src_c/stem_ISO_8859_1_finnish.h
27
+ libstemmer_c/src_c/stem_ISO_8859_1_french.c
28
+ libstemmer_c/src_c/stem_ISO_8859_1_french.h
29
+ libstemmer_c/src_c/stem_ISO_8859_1_german.c
30
+ libstemmer_c/src_c/stem_ISO_8859_1_german.h
31
+ libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c
32
+ libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
33
+ libstemmer_c/src_c/stem_ISO_8859_1_italian.c
34
+ libstemmer_c/src_c/stem_ISO_8859_1_italian.h
35
+ libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
36
+ libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
37
+ libstemmer_c/src_c/stem_ISO_8859_1_porter.c
38
+ libstemmer_c/src_c/stem_ISO_8859_1_porter.h
39
+ libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c
40
+ libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h
41
+ libstemmer_c/src_c/stem_ISO_8859_1_spanish.c
42
+ libstemmer_c/src_c/stem_ISO_8859_1_spanish.h
43
+ libstemmer_c/src_c/stem_ISO_8859_1_swedish.c
44
+ libstemmer_c/src_c/stem_ISO_8859_1_swedish.h
45
+ libstemmer_c/src_c/stem_ISO_8859_2_romanian.c
46
+ libstemmer_c/src_c/stem_ISO_8859_2_romanian.h
47
+ libstemmer_c/src_c/stem_KOI8_R_russian.c
48
+ libstemmer_c/src_c/stem_KOI8_R_russian.h
49
+ libstemmer_c/src_c/stem_UTF_8_danish.c
50
+ libstemmer_c/src_c/stem_UTF_8_danish.h
51
+ libstemmer_c/src_c/stem_UTF_8_dutch.c
52
+ libstemmer_c/src_c/stem_UTF_8_dutch.h
53
+ libstemmer_c/src_c/stem_UTF_8_english.c
54
+ libstemmer_c/src_c/stem_UTF_8_english.h
55
+ libstemmer_c/src_c/stem_UTF_8_finnish.c
56
+ libstemmer_c/src_c/stem_UTF_8_finnish.h
57
+ libstemmer_c/src_c/stem_UTF_8_french.c
58
+ libstemmer_c/src_c/stem_UTF_8_french.h
59
+ libstemmer_c/src_c/stem_UTF_8_german.c
60
+ libstemmer_c/src_c/stem_UTF_8_german.h
61
+ libstemmer_c/src_c/stem_UTF_8_hungarian.c
62
+ libstemmer_c/src_c/stem_UTF_8_hungarian.h
63
+ libstemmer_c/src_c/stem_UTF_8_italian.c
64
+ libstemmer_c/src_c/stem_UTF_8_italian.h
65
+ libstemmer_c/src_c/stem_UTF_8_norwegian.c
66
+ libstemmer_c/src_c/stem_UTF_8_norwegian.h
67
+ libstemmer_c/src_c/stem_UTF_8_porter.c
68
+ libstemmer_c/src_c/stem_UTF_8_porter.h
69
+ libstemmer_c/src_c/stem_UTF_8_portuguese.c
70
+ libstemmer_c/src_c/stem_UTF_8_portuguese.h
71
+ libstemmer_c/src_c/stem_UTF_8_romanian.c
72
+ libstemmer_c/src_c/stem_UTF_8_romanian.h
73
+ libstemmer_c/src_c/stem_UTF_8_russian.c
74
+ libstemmer_c/src_c/stem_UTF_8_russian.h
75
+ libstemmer_c/src_c/stem_UTF_8_spanish.c
76
+ libstemmer_c/src_c/stem_UTF_8_spanish.h
77
+ libstemmer_c/src_c/stem_UTF_8_swedish.c
78
+ libstemmer_c/src_c/stem_UTF_8_swedish.h
79
+ libstemmer_c/src_c/stem_UTF_8_turkish.c
80
+ libstemmer_c/src_c/stem_UTF_8_turkish.h
81
+ MIT-LICENSE
82
+ Rakefile
83
+ README
84
+ ruby-stemmer.c
85
+ test.rb
86
+ Manifest
data/Rakefile CHANGED
@@ -4,7 +4,19 @@
4
4
 
5
5
  require 'rubygems'
6
6
  require 'rake'
7
- require 'rake/gempackagetask'
7
+ require 'echoe'
8
+
9
+ Echoe.new('ruby-stemmer', '0.5.3') do |p|
10
+ p.description = "Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1"
11
+ p.url = "http://github.com/yury/ruby-stemmer"
12
+ p.author = "Yury Korolev"
13
+ p.email = "yury.korolev@gmail.com"
14
+ p.extensions = ["extconf.rb"]
15
+ p.ignore_pattern = ["*.o", "**/*.o", "stemwords", "*.bundle", "*.a", "*.so"]
16
+ p.development_dependencies = []
17
+ p.runtime_dependencies = []
18
+ p.has_rdoc = true
19
+ end
8
20
 
9
21
  rm_rf 'Makefile'
10
22
 
@@ -23,35 +35,6 @@ PKG_FILES.exclude('*.bundle')
23
35
  PKG_FILES.exclude('*.a')
24
36
  PKG_FILES.exclude('*.so')
25
37
 
26
- spec = Gem::Specification.new do | s |
27
- s.name = 'ruby-stemmer'
28
- s.version = '0.5.1'
29
- s.summary = "Stemmer implementation to ruby using SnowBall API from libstemmer_c."
30
- s.description = <<-EOF
31
- Stemmer implementation to ruby using libstemmer_c.
32
- EOF
33
-
34
- s.files = PKG_FILES.to_a
35
- s.extensions << "extconf.rb"
36
- s.has_rdoc = true
37
- s.rdoc_options << '--title' << 'Ruby-Stemmer' <<
38
- '--exclude' << 'libstemmer_c' << '--exclude' << 'extconf.rb' <<
39
- '--exclude' << 'test.rb' << '--inline-source' << 'ruby-stemmer.c' << 'README' << '--main' << 'README'
40
- s.author = "Aurelian Oancea"
41
- s.email = "aurelian@locknet.ro"
42
- s.homepage = "http://nrr.rubyforge.org"
43
- s.rubyforge_project = "nrr"
44
- end
45
-
46
- pt = Rake::GemPackageTask.new(spec) do |p|
47
- p.need_tar = true
48
- p.need_zip = true
49
- end
50
-
51
- task :default do
52
- puts "Ok"
53
- end
54
-
55
38
  task :clean do
56
39
  `rm -rf Makefile mkmf.log ruby-stemmer.o stemmer.bundle stemmer.so`
57
40
  `cd libstemmer_c && make clean && cd ../`
@@ -0,0 +1,209 @@
1
+ /* This is a simple program which uses libstemmer to provide a command
2
+ * line interface for stemming using any of the algorithms provided.
3
+ */
4
+
5
+ #include <stdio.h>
6
+ #include <stdlib.h> /* for malloc, free */
7
+ #include <string.h> /* for memmove */
8
+ #include <ctype.h> /* for isupper, tolower */
9
+
10
+ #include "libstemmer.h"
11
+
12
+ const char * progname;
13
+ static int pretty = 1;
14
+
15
+ static void
16
+ stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out)
17
+ {
18
+ #define INC 10
19
+ int lim = INC;
20
+ sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol));
21
+
22
+ while(1) {
23
+ int ch = getc(f_in);
24
+ if (ch == EOF) {
25
+ free(b); return;
26
+ }
27
+ {
28
+ int i = 0;
29
+ int inlen = 0;
30
+ while(1) {
31
+ if (ch == '\n' || ch == EOF) break;
32
+ if (i == lim) {
33
+ sb_symbol * newb;
34
+ newb = (sb_symbol *)
35
+ realloc(b, (lim + INC) * sizeof(sb_symbol));
36
+ if (newb == 0) goto error;
37
+ b = newb;
38
+ lim = lim + INC;
39
+ }
40
+ /* Update count of utf-8 characters. */
41
+ if (ch < 0x80 || ch > 0xBF) inlen += 1;
42
+ /* force lower case: */
43
+ if (isupper(ch)) ch = tolower(ch);
44
+
45
+ b[i] = ch;
46
+ i++;
47
+ ch = getc(f_in);
48
+ }
49
+
50
+ {
51
+ const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i);
52
+ if (stemmed == NULL)
53
+ {
54
+ fprintf(stderr, "Out of memory");
55
+ exit(1);
56
+ }
57
+ else
58
+ {
59
+ if (pretty == 1) {
60
+ fwrite(b, i, 1, f_out);
61
+ fputs(" -> ", f_out);
62
+ } else if (pretty == 2) {
63
+ fwrite(b, i, 1, f_out);
64
+ if (sb_stemmer_length(stemmer) > 0) {
65
+ int j;
66
+ if (inlen < 30) {
67
+ for (j = 30 - inlen; j > 0; j--)
68
+ fputs(" ", f_out);
69
+ } else {
70
+ fputs("\n", f_out);
71
+ for (j = 30; j > 0; j--)
72
+ fputs(" ", f_out);
73
+ }
74
+ }
75
+ }
76
+
77
+ fputs((char *)stemmed, f_out);
78
+ putc('\n', f_out);
79
+ }
80
+ }
81
+ }
82
+ }
83
+ error:
84
+ if (b != 0) free(b);
85
+ return;
86
+ }
87
+
88
+ /** Display the command line syntax, and then exit.
89
+ * @param n The value to exit with.
90
+ */
91
+ static void
92
+ usage(int n)
93
+ {
94
+ printf("usage: %s [-l <language>] [-i <input file>] [-o <output file>] [-c <character encoding>] [-p[2]] [-h]\n"
95
+ "\n"
96
+ "The input file consists of a list of words to be stemmed, one per\n"
97
+ "line. Words should be in lower case, but (for English) A-Z letters\n"
98
+ "are mapped to their a-z equivalents anyway. If omitted, stdin is\n"
99
+ "used.\n"
100
+ "\n"
101
+ "If -c is given, the argument is the character encoding of the input\n"
102
+ "and output files. If it is omitted, the UTF-8 encoding is used.\n"
103
+ "\n"
104
+ "If -p is given the output file consists of each word of the input\n"
105
+ "file followed by \"->\" followed by its stemmed equivalent.\n"
106
+ "If -p2 is given the output file is a two column layout containing\n"
107
+ "the input words in the first column and the stemmed eqivalents in\n"
108
+ "the second column.\n"
109
+ "Otherwise, the output file consists of the stemmed words, one per\n"
110
+ "line.\n"
111
+ "\n"
112
+ "-h displays this help\n",
113
+ progname);
114
+ exit(n);
115
+ }
116
+
117
+ int
118
+ main(int argc, char * argv[])
119
+ {
120
+ char * in = 0;
121
+ char * out = 0;
122
+ FILE * f_in;
123
+ FILE * f_out;
124
+ struct sb_stemmer * stemmer;
125
+
126
+ char * language = "english";
127
+ char * charenc = NULL;
128
+
129
+ char * s;
130
+ int i = 1;
131
+ pretty = 0;
132
+
133
+ progname = argv[0];
134
+
135
+ while(i < argc) {
136
+ s = argv[i++];
137
+ if (s[0] == '-') {
138
+ if (strcmp(s, "-o") == 0) {
139
+ if (i >= argc) {
140
+ fprintf(stderr, "%s requires an argument\n", s);
141
+ exit(1);
142
+ }
143
+ out = argv[i++];
144
+ } else if (strcmp(s, "-i") == 0) {
145
+ if (i >= argc) {
146
+ fprintf(stderr, "%s requires an argument\n", s);
147
+ exit(1);
148
+ }
149
+ in = argv[i++];
150
+ } else if (strcmp(s, "-l") == 0) {
151
+ if (i >= argc) {
152
+ fprintf(stderr, "%s requires an argument\n", s);
153
+ exit(1);
154
+ }
155
+ language = argv[i++];
156
+ } else if (strcmp(s, "-c") == 0) {
157
+ if (i >= argc) {
158
+ fprintf(stderr, "%s requires an argument\n", s);
159
+ exit(1);
160
+ }
161
+ charenc = argv[i++];
162
+ } else if (strcmp(s, "-p2") == 0) {
163
+ pretty = 2;
164
+ } else if (strcmp(s, "-p") == 0) {
165
+ pretty = 1;
166
+ } else if (strcmp(s, "-h") == 0) {
167
+ usage(0);
168
+ } else {
169
+ fprintf(stderr, "option %s unknown\n", s);
170
+ usage(1);
171
+ }
172
+ } else {
173
+ fprintf(stderr, "unexpected parameter %s\n", s);
174
+ usage(1);
175
+ }
176
+ }
177
+
178
+ /* prepare the files */
179
+ f_in = (in == 0) ? stdin : fopen(in, "r");
180
+ if (f_in == 0) {
181
+ fprintf(stderr, "file %s not found\n", in);
182
+ exit(1);
183
+ }
184
+ f_out = (out == 0) ? stdout : fopen(out, "w");
185
+ if (f_out == 0) {
186
+ fprintf(stderr, "file %s cannot be opened\n", out);
187
+ exit(1);
188
+ }
189
+
190
+ /* do the stemming process: */
191
+ stemmer = sb_stemmer_new(language, charenc);
192
+ if (stemmer == 0) {
193
+ if (charenc == NULL) {
194
+ fprintf(stderr, "language `%s' not available for stemming\n", language);
195
+ exit(1);
196
+ } else {
197
+ fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc);
198
+ exit(1);
199
+ }
200
+ }
201
+ stem_file(stemmer, f_in, f_out);
202
+ sb_stemmer_delete(stemmer);
203
+
204
+ if (in != 0) (void) fclose(f_in);
205
+ if (out != 0) (void) fclose(f_out);
206
+
207
+ return 0;
208
+ }
209
+
@@ -54,15 +54,15 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
54
54
  renc = rb_str_new2("UTF_8");
55
55
  }
56
56
 
57
- stemmer = sb_stemmer_new( RSTRING(rlang)->ptr, RSTRING(renc)->ptr );
57
+ stemmer = sb_stemmer_new( RSTRING_PTR(rlang), RSTRING_PTR(renc) );
58
58
  if (stemmer == 0) {
59
59
  // printf(">>[libstemmer]: got a null stemmer!\n");
60
60
  if (renc == 0 ) {
61
- rb_raise(rb_eRuntimeError, "Language %s not available for stemming", RSTRING(rlang)->ptr);
61
+ rb_raise(rb_eRuntimeError, "Language %s not available for stemming", RSTRING_PTR(rlang));
62
62
  exit(1);
63
63
  } else {
64
64
  rb_raise(rb_eRuntimeError, "Language %s not available for stemming in encoding %s",
65
- RSTRING(rlang)->ptr, RSTRING(renc)->ptr);
65
+ RSTRING_PTR(rlang), RSTRING_PTR(renc));
66
66
  exit(1);
67
67
  }
68
68
  }
@@ -70,8 +70,8 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
70
70
  sb_data = ALLOC(struct sb_stemmer_data);
71
71
  DATA_PTR(self) = sb_data;
72
72
  sb_data->stemmer= stemmer;
73
- sb_data->lang = RSTRING(rlang)->ptr;
74
- sb_data->enc = RSTRING(renc)->ptr;
73
+ sb_data->lang = RSTRING_PTR(rlang);
74
+ sb_data->enc = RSTRING_PTR(renc);
75
75
 
76
76
  return self;
77
77
  }
@@ -91,8 +91,8 @@ rb_stemmer_stem(VALUE self, VALUE word) {
91
91
  struct sb_stemmer_data * sb_data;
92
92
  const sb_symbol * stemmed;
93
93
  GetStemmer(self, sb_data);
94
- stemmed = sb_stemmer_stem(sb_data->stemmer, (sb_symbol *)RSTRING(word)->ptr, RSTRING(word)->len);
95
- // printf(">>[libstemmer %s/%s]: %s-> %s\n", sb_data->lang, sb_data->enc, RSTRING(word)->ptr, stemmed);
94
+ stemmed = sb_stemmer_stem(sb_data->stemmer, (sb_symbol *)RSTRING_PTR(word), RSTRING_LEN(word));
95
+ // printf(">>[libstemmer %s/%s]: %s-> %s\n", sb_data->lang, sb_data->enc, RSTRING_PTR(word), stemmed);
96
96
  return rb_str_new2((char *)stemmed);
97
97
  }
98
98
 
@@ -0,0 +1,32 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{ruby-stemmer}
5
+ s.version = "0.5.3"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Yury Korolev"]
9
+ s.date = %q{2009-02-10}
10
+ s.description = %q{Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1}
11
+ s.email = %q{yury.korolev@gmail.com}
12
+ s.extensions = ["extconf.rb"]
13
+ s.extra_rdoc_files = ["extconf.rb", "libstemmer_c/examples/stemwords.c", "libstemmer_c/include/libstemmer.h", "libstemmer_c/libstemmer/libstemmer.c", "libstemmer_c/libstemmer/libstemmer_utf8.c", "libstemmer_c/libstemmer/modules.h", "libstemmer_c/libstemmer/modules.txt", "libstemmer_c/libstemmer/modules_utf8.h", "libstemmer_c/libstemmer/modules_utf8.txt", "libstemmer_c/Makefile", "libstemmer_c/MANIFEST", "libstemmer_c/mkinc.mak", "libstemmer_c/mkinc_utf8.mak", "libstemmer_c/README", "libstemmer_c/runtime/api.c", "libstemmer_c/runtime/api.h", "libstemmer_c/runtime/header.h", "libstemmer_c/runtime/utilities.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.h", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.c", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.h", "libstemmer_c/src_c/stem_ISO_8859_1_english.c", "libstemmer_c/src_c/stem_ISO_8859_1_english.h", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.c", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.h", "libstemmer_c/src_c/stem_ISO_8859_1_french.c", "libstemmer_c/src_c/stem_ISO_8859_1_french.h", "libstemmer_c/src_c/stem_ISO_8859_1_german.c", "libstemmer_c/src_c/stem_ISO_8859_1_german.h", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h", "libstemmer_c/src_c/stem_ISO_8859_1_italian.c", "libstemmer_c/src_c/stem_ISO_8859_1_italian.h", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h", "libstemmer_c/src_c/stem_ISO_8859_1_porter.c", "libstemmer_c/src_c/stem_ISO_8859_1_porter.h", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.c", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.h", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.c", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.h", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.c", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.h", "libstemmer_c/src_c/stem_KOI8_R_russian.c", "libstemmer_c/src_c/stem_KOI8_R_russian.h", "libstemmer_c/src_c/stem_UTF_8_danish.c", "libstemmer_c/src_c/stem_UTF_8_danish.h", "libstemmer_c/src_c/stem_UTF_8_dutch.c", "libstemmer_c/src_c/stem_UTF_8_dutch.h", "libstemmer_c/src_c/stem_UTF_8_english.c", "libstemmer_c/src_c/stem_UTF_8_english.h", "libstemmer_c/src_c/stem_UTF_8_finnish.c", "libstemmer_c/src_c/stem_UTF_8_finnish.h", "libstemmer_c/src_c/stem_UTF_8_french.c", "libstemmer_c/src_c/stem_UTF_8_french.h", "libstemmer_c/src_c/stem_UTF_8_german.c", "libstemmer_c/src_c/stem_UTF_8_german.h", "libstemmer_c/src_c/stem_UTF_8_hungarian.c", "libstemmer_c/src_c/stem_UTF_8_hungarian.h", "libstemmer_c/src_c/stem_UTF_8_italian.c", "libstemmer_c/src_c/stem_UTF_8_italian.h", "libstemmer_c/src_c/stem_UTF_8_norwegian.c", "libstemmer_c/src_c/stem_UTF_8_norwegian.h", "libstemmer_c/src_c/stem_UTF_8_porter.c", "libstemmer_c/src_c/stem_UTF_8_porter.h", "libstemmer_c/src_c/stem_UTF_8_portuguese.c", "libstemmer_c/src_c/stem_UTF_8_portuguese.h", "libstemmer_c/src_c/stem_UTF_8_romanian.c", "libstemmer_c/src_c/stem_UTF_8_romanian.h", "libstemmer_c/src_c/stem_UTF_8_russian.c", "libstemmer_c/src_c/stem_UTF_8_russian.h", "libstemmer_c/src_c/stem_UTF_8_spanish.c", "libstemmer_c/src_c/stem_UTF_8_spanish.h", "libstemmer_c/src_c/stem_UTF_8_swedish.c", "libstemmer_c/src_c/stem_UTF_8_swedish.h", "libstemmer_c/src_c/stem_UTF_8_turkish.c", "libstemmer_c/src_c/stem_UTF_8_turkish.h", "README"]
14
+ s.files = ["extconf.rb", "libstemmer_c/examples/stemwords.c", "libstemmer_c/include/libstemmer.h", "libstemmer_c/libstemmer/libstemmer.c", "libstemmer_c/libstemmer/libstemmer_utf8.c", "libstemmer_c/libstemmer/modules.h", "libstemmer_c/libstemmer/modules.txt", "libstemmer_c/libstemmer/modules_utf8.h", "libstemmer_c/libstemmer/modules_utf8.txt", "libstemmer_c/Makefile", "libstemmer_c/MANIFEST", "libstemmer_c/mkinc.mak", "libstemmer_c/mkinc_utf8.mak", "libstemmer_c/README", "libstemmer_c/runtime/api.c", "libstemmer_c/runtime/api.h", "libstemmer_c/runtime/header.h", "libstemmer_c/runtime/utilities.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.h", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.c", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.h", "libstemmer_c/src_c/stem_ISO_8859_1_english.c", "libstemmer_c/src_c/stem_ISO_8859_1_english.h", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.c", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.h", "libstemmer_c/src_c/stem_ISO_8859_1_french.c", "libstemmer_c/src_c/stem_ISO_8859_1_french.h", "libstemmer_c/src_c/stem_ISO_8859_1_german.c", "libstemmer_c/src_c/stem_ISO_8859_1_german.h", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h", "libstemmer_c/src_c/stem_ISO_8859_1_italian.c", "libstemmer_c/src_c/stem_ISO_8859_1_italian.h", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h", "libstemmer_c/src_c/stem_ISO_8859_1_porter.c", "libstemmer_c/src_c/stem_ISO_8859_1_porter.h", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.c", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.h", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.c", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.h", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.c", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.h", "libstemmer_c/src_c/stem_KOI8_R_russian.c", "libstemmer_c/src_c/stem_KOI8_R_russian.h", "libstemmer_c/src_c/stem_UTF_8_danish.c", "libstemmer_c/src_c/stem_UTF_8_danish.h", "libstemmer_c/src_c/stem_UTF_8_dutch.c", "libstemmer_c/src_c/stem_UTF_8_dutch.h", "libstemmer_c/src_c/stem_UTF_8_english.c", "libstemmer_c/src_c/stem_UTF_8_english.h", "libstemmer_c/src_c/stem_UTF_8_finnish.c", "libstemmer_c/src_c/stem_UTF_8_finnish.h", "libstemmer_c/src_c/stem_UTF_8_french.c", "libstemmer_c/src_c/stem_UTF_8_french.h", "libstemmer_c/src_c/stem_UTF_8_german.c", "libstemmer_c/src_c/stem_UTF_8_german.h", "libstemmer_c/src_c/stem_UTF_8_hungarian.c", "libstemmer_c/src_c/stem_UTF_8_hungarian.h", "libstemmer_c/src_c/stem_UTF_8_italian.c", "libstemmer_c/src_c/stem_UTF_8_italian.h", "libstemmer_c/src_c/stem_UTF_8_norwegian.c", "libstemmer_c/src_c/stem_UTF_8_norwegian.h", "libstemmer_c/src_c/stem_UTF_8_porter.c", "libstemmer_c/src_c/stem_UTF_8_porter.h", "libstemmer_c/src_c/stem_UTF_8_portuguese.c", "libstemmer_c/src_c/stem_UTF_8_portuguese.h", "libstemmer_c/src_c/stem_UTF_8_romanian.c", "libstemmer_c/src_c/stem_UTF_8_romanian.h", "libstemmer_c/src_c/stem_UTF_8_russian.c", "libstemmer_c/src_c/stem_UTF_8_russian.h", "libstemmer_c/src_c/stem_UTF_8_spanish.c", "libstemmer_c/src_c/stem_UTF_8_spanish.h", "libstemmer_c/src_c/stem_UTF_8_swedish.c", "libstemmer_c/src_c/stem_UTF_8_swedish.h", "libstemmer_c/src_c/stem_UTF_8_turkish.c", "libstemmer_c/src_c/stem_UTF_8_turkish.h", "MIT-LICENSE", "Rakefile", "README", "ruby-stemmer.c", "test.rb", "Manifest", "ruby-stemmer.gemspec"]
15
+ s.has_rdoc = true
16
+ s.homepage = %q{http://github.com/yury/ruby-stemmer}
17
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Ruby-stemmer", "--main", "README"]
18
+ s.require_paths = ["lib"]
19
+ s.rubyforge_project = %q{ruby-stemmer}
20
+ s.rubygems_version = %q{1.3.1}
21
+ s.summary = %q{Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1}
22
+
23
+ if s.respond_to? :specification_version then
24
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
25
+ s.specification_version = 2
26
+
27
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
28
+ else
29
+ end
30
+ else
31
+ end
32
+ end
data/test.rb CHANGED
@@ -1,11 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
-
2
+ # coding:utf-8
3
3
  #
4
4
  # $Id: test.rb 21 2008-04-30 10:57:37Z aurelian $
5
5
  #
6
6
 
7
- $kcode = "utf-8"
8
-
9
7
  require "stemmer.so"
10
8
 
11
9
  #puts "installation".stem
metadata CHANGED
@@ -1,37 +1,110 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-stemmer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
- - Aurelian Oancea
7
+ - Yury Korolev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-05-10 00:00:00 +02:00
12
+ date: 2009-02-10 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
16
- description: Stemmer implementation to ruby using libstemmer_c.
17
- email: aurelian@locknet.ro
16
+ description: Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1
17
+ email: yury.korolev@gmail.com
18
18
  executables: []
19
19
 
20
20
  extensions:
21
21
  - extconf.rb
22
- extra_rdoc_files: []
23
-
24
- files:
22
+ extra_rdoc_files:
25
23
  - extconf.rb
26
- - ruby-stemmer.c
27
- - test.rb
28
- - MIT-LICENSE
29
- - Rakefile
24
+ - libstemmer_c/examples/stemwords.c
25
+ - libstemmer_c/include/libstemmer.h
26
+ - libstemmer_c/libstemmer/libstemmer.c
27
+ - libstemmer_c/libstemmer/libstemmer_utf8.c
28
+ - libstemmer_c/libstemmer/modules.h
29
+ - libstemmer_c/libstemmer/modules.txt
30
+ - libstemmer_c/libstemmer/modules_utf8.h
31
+ - libstemmer_c/libstemmer/modules_utf8.txt
32
+ - libstemmer_c/Makefile
33
+ - libstemmer_c/MANIFEST
34
+ - libstemmer_c/mkinc.mak
35
+ - libstemmer_c/mkinc_utf8.mak
36
+ - libstemmer_c/README
37
+ - libstemmer_c/runtime/api.c
38
+ - libstemmer_c/runtime/api.h
39
+ - libstemmer_c/runtime/header.h
40
+ - libstemmer_c/runtime/utilities.c
41
+ - libstemmer_c/src_c/stem_ISO_8859_1_danish.c
42
+ - libstemmer_c/src_c/stem_ISO_8859_1_danish.h
43
+ - libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
44
+ - libstemmer_c/src_c/stem_ISO_8859_1_dutch.h
45
+ - libstemmer_c/src_c/stem_ISO_8859_1_english.c
46
+ - libstemmer_c/src_c/stem_ISO_8859_1_english.h
47
+ - libstemmer_c/src_c/stem_ISO_8859_1_finnish.c
48
+ - libstemmer_c/src_c/stem_ISO_8859_1_finnish.h
49
+ - libstemmer_c/src_c/stem_ISO_8859_1_french.c
50
+ - libstemmer_c/src_c/stem_ISO_8859_1_french.h
51
+ - libstemmer_c/src_c/stem_ISO_8859_1_german.c
52
+ - libstemmer_c/src_c/stem_ISO_8859_1_german.h
53
+ - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c
54
+ - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
55
+ - libstemmer_c/src_c/stem_ISO_8859_1_italian.c
56
+ - libstemmer_c/src_c/stem_ISO_8859_1_italian.h
57
+ - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
58
+ - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
59
+ - libstemmer_c/src_c/stem_ISO_8859_1_porter.c
60
+ - libstemmer_c/src_c/stem_ISO_8859_1_porter.h
61
+ - libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c
62
+ - libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h
63
+ - libstemmer_c/src_c/stem_ISO_8859_1_spanish.c
64
+ - libstemmer_c/src_c/stem_ISO_8859_1_spanish.h
65
+ - libstemmer_c/src_c/stem_ISO_8859_1_swedish.c
66
+ - libstemmer_c/src_c/stem_ISO_8859_1_swedish.h
67
+ - libstemmer_c/src_c/stem_ISO_8859_2_romanian.c
68
+ - libstemmer_c/src_c/stem_ISO_8859_2_romanian.h
69
+ - libstemmer_c/src_c/stem_KOI8_R_russian.c
70
+ - libstemmer_c/src_c/stem_KOI8_R_russian.h
71
+ - libstemmer_c/src_c/stem_UTF_8_danish.c
72
+ - libstemmer_c/src_c/stem_UTF_8_danish.h
73
+ - libstemmer_c/src_c/stem_UTF_8_dutch.c
74
+ - libstemmer_c/src_c/stem_UTF_8_dutch.h
75
+ - libstemmer_c/src_c/stem_UTF_8_english.c
76
+ - libstemmer_c/src_c/stem_UTF_8_english.h
77
+ - libstemmer_c/src_c/stem_UTF_8_finnish.c
78
+ - libstemmer_c/src_c/stem_UTF_8_finnish.h
79
+ - libstemmer_c/src_c/stem_UTF_8_french.c
80
+ - libstemmer_c/src_c/stem_UTF_8_french.h
81
+ - libstemmer_c/src_c/stem_UTF_8_german.c
82
+ - libstemmer_c/src_c/stem_UTF_8_german.h
83
+ - libstemmer_c/src_c/stem_UTF_8_hungarian.c
84
+ - libstemmer_c/src_c/stem_UTF_8_hungarian.h
85
+ - libstemmer_c/src_c/stem_UTF_8_italian.c
86
+ - libstemmer_c/src_c/stem_UTF_8_italian.h
87
+ - libstemmer_c/src_c/stem_UTF_8_norwegian.c
88
+ - libstemmer_c/src_c/stem_UTF_8_norwegian.h
89
+ - libstemmer_c/src_c/stem_UTF_8_porter.c
90
+ - libstemmer_c/src_c/stem_UTF_8_porter.h
91
+ - libstemmer_c/src_c/stem_UTF_8_portuguese.c
92
+ - libstemmer_c/src_c/stem_UTF_8_portuguese.h
93
+ - libstemmer_c/src_c/stem_UTF_8_romanian.c
94
+ - libstemmer_c/src_c/stem_UTF_8_romanian.h
95
+ - libstemmer_c/src_c/stem_UTF_8_russian.c
96
+ - libstemmer_c/src_c/stem_UTF_8_russian.h
97
+ - libstemmer_c/src_c/stem_UTF_8_spanish.c
98
+ - libstemmer_c/src_c/stem_UTF_8_spanish.h
99
+ - libstemmer_c/src_c/stem_UTF_8_swedish.c
100
+ - libstemmer_c/src_c/stem_UTF_8_swedish.h
101
+ - libstemmer_c/src_c/stem_UTF_8_turkish.c
102
+ - libstemmer_c/src_c/stem_UTF_8_turkish.h
30
103
  - README
31
- - libstemmer_c/examples
32
- - libstemmer_c/include
104
+ files:
105
+ - extconf.rb
106
+ - libstemmer_c/examples/stemwords.c
33
107
  - libstemmer_c/include/libstemmer.h
34
- - libstemmer_c/libstemmer
35
108
  - libstemmer_c/libstemmer/libstemmer.c
36
109
  - libstemmer_c/libstemmer/libstemmer_utf8.c
37
110
  - libstemmer_c/libstemmer/modules.h
@@ -43,12 +116,10 @@ files:
43
116
  - libstemmer_c/mkinc.mak
44
117
  - libstemmer_c/mkinc_utf8.mak
45
118
  - libstemmer_c/README
46
- - libstemmer_c/runtime
47
119
  - libstemmer_c/runtime/api.c
48
120
  - libstemmer_c/runtime/api.h
49
121
  - libstemmer_c/runtime/header.h
50
122
  - libstemmer_c/runtime/utilities.c
51
- - libstemmer_c/src_c
52
123
  - libstemmer_c/src_c/stem_ISO_8859_1_danish.c
53
124
  - libstemmer_c/src_c/stem_ISO_8859_1_danish.h
54
125
  - libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
@@ -111,21 +182,21 @@ files:
111
182
  - libstemmer_c/src_c/stem_UTF_8_swedish.h
112
183
  - libstemmer_c/src_c/stem_UTF_8_turkish.c
113
184
  - libstemmer_c/src_c/stem_UTF_8_turkish.h
185
+ - MIT-LICENSE
186
+ - Rakefile
187
+ - README
188
+ - ruby-stemmer.c
189
+ - test.rb
190
+ - Manifest
191
+ - ruby-stemmer.gemspec
114
192
  has_rdoc: true
115
- homepage: http://nrr.rubyforge.org
193
+ homepage: http://github.com/yury/ruby-stemmer
116
194
  post_install_message:
117
195
  rdoc_options:
118
- - --title
119
- - Ruby-Stemmer
120
- - --exclude
121
- - libstemmer_c
122
- - --exclude
123
- - extconf.rb
124
- - --exclude
125
- - test.rb
196
+ - --line-numbers
126
197
  - --inline-source
127
- - ruby-stemmer.c
128
- - README
198
+ - --title
199
+ - Ruby-stemmer
129
200
  - --main
130
201
  - README
131
202
  require_paths:
@@ -140,14 +211,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
211
  requirements:
141
212
  - - ">="
142
213
  - !ruby/object:Gem::Version
143
- version: "0"
214
+ version: "1.2"
144
215
  version:
145
216
  requirements: []
146
217
 
147
- rubyforge_project: nrr
148
- rubygems_version: 1.0.1
218
+ rubyforge_project: ruby-stemmer
219
+ rubygems_version: 1.3.1
149
220
  signing_key:
150
221
  specification_version: 2
151
- summary: Stemmer implementation to ruby using SnowBall API from libstemmer_c.
222
+ summary: Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1
152
223
  test_files: []
153
224