stemmer4r 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (140) hide show
  1. data/CVS/Entries +5 -0
  2. data/CVS/Repository +1 -0
  3. data/CVS/Root +1 -0
  4. data/LICENSE +20 -0
  5. data/README +9 -0
  6. data/ext/CVS/Entries +1 -0
  7. data/ext/CVS/Repository +1 -0
  8. data/ext/CVS/Root +1 -0
  9. data/ext/stemmer4r/CVS/Entries +4 -0
  10. data/ext/stemmer4r/CVS/Repository +1 -0
  11. data/ext/stemmer4r/CVS/Root +1 -0
  12. data/ext/stemmer4r/depend +14 -0
  13. data/ext/stemmer4r/extconf.rb +8 -0
  14. data/ext/stemmer4r/libstemmer_c/CVS/Entries +7 -0
  15. data/ext/stemmer4r/libstemmer_c/CVS/Repository +1 -0
  16. data/ext/stemmer4r/libstemmer_c/CVS/Root +1 -0
  17. data/ext/stemmer4r/libstemmer_c/MANIFEST +39 -0
  18. data/ext/stemmer4r/libstemmer_c/Makefile +5 -0
  19. data/ext/stemmer4r/libstemmer_c/include/CVS/Entries +2 -0
  20. data/ext/stemmer4r/libstemmer_c/include/CVS/Repository +1 -0
  21. data/ext/stemmer4r/libstemmer_c/include/CVS/Root +1 -0
  22. data/ext/stemmer4r/libstemmer_c/include/libstemmer.h +63 -0
  23. data/ext/stemmer4r/libstemmer_c/libstemmer/CVS/Entries +3 -0
  24. data/ext/stemmer4r/libstemmer_c/libstemmer/CVS/Repository +1 -0
  25. data/ext/stemmer4r/libstemmer_c/libstemmer/CVS/Root +1 -0
  26. data/ext/stemmer4r/libstemmer_c/libstemmer/libstemmer.c +78 -0
  27. data/ext/stemmer4r/libstemmer_c/libstemmer/modules.h +96 -0
  28. data/ext/stemmer4r/libstemmer_c/mkinc.mak +42 -0
  29. data/ext/stemmer4r/libstemmer_c/runtime/CVS/Entries +5 -0
  30. data/ext/stemmer4r/libstemmer_c/runtime/CVS/Repository +1 -0
  31. data/ext/stemmer4r/libstemmer_c/runtime/CVS/Root +1 -0
  32. data/ext/stemmer4r/libstemmer_c/runtime/api.c +69 -0
  33. data/ext/stemmer4r/libstemmer_c/runtime/api.h +27 -0
  34. data/ext/stemmer4r/libstemmer_c/runtime/header.h +56 -0
  35. data/ext/stemmer4r/libstemmer_c/runtime/utilities.c +403 -0
  36. data/ext/stemmer4r/libstemmer_c/src_c/CVS/Entries +33 -0
  37. data/ext/stemmer4r/libstemmer_c/src_c/CVS/Repository +1 -0
  38. data/ext/stemmer4r/libstemmer_c/src_c/CVS/Root +1 -0
  39. data/ext/stemmer4r/libstemmer_c/src_c/stem_danish.c +330 -0
  40. data/ext/stemmer4r/libstemmer_c/src_c/stem_danish.h +16 -0
  41. data/ext/stemmer4r/libstemmer_c/src_c/stem_dutch.c +635 -0
  42. data/ext/stemmer4r/libstemmer_c/src_c/stem_dutch.h +16 -0
  43. data/ext/stemmer4r/libstemmer_c/src_c/stem_english.c +1109 -0
  44. data/ext/stemmer4r/libstemmer_c/src_c/stem_english.h +16 -0
  45. data/ext/stemmer4r/libstemmer_c/src_c/stem_finnish.c +792 -0
  46. data/ext/stemmer4r/libstemmer_c/src_c/stem_finnish.h +16 -0
  47. data/ext/stemmer4r/libstemmer_c/src_c/stem_french.c +1276 -0
  48. data/ext/stemmer4r/libstemmer_c/src_c/stem_french.h +16 -0
  49. data/ext/stemmer4r/libstemmer_c/src_c/stem_german.c +504 -0
  50. data/ext/stemmer4r/libstemmer_c/src_c/stem_german.h +16 -0
  51. data/ext/stemmer4r/libstemmer_c/src_c/stem_german2.c +549 -0
  52. data/ext/stemmer4r/libstemmer_c/src_c/stem_german2.h +16 -0
  53. data/ext/stemmer4r/libstemmer_c/src_c/stem_italian.c +1087 -0
  54. data/ext/stemmer4r/libstemmer_c/src_c/stem_italian.h +16 -0
  55. data/ext/stemmer4r/libstemmer_c/src_c/stem_kraaij_pohlmann.c +1780 -0
  56. data/ext/stemmer4r/libstemmer_c/src_c/stem_kraaij_pohlmann.h +16 -0
  57. data/ext/stemmer4r/libstemmer_c/src_c/stem_lovins.c +1752 -0
  58. data/ext/stemmer4r/libstemmer_c/src_c/stem_lovins.h +16 -0
  59. data/ext/stemmer4r/libstemmer_c/src_c/stem_norwegian.c +279 -0
  60. data/ext/stemmer4r/libstemmer_c/src_c/stem_norwegian.h +16 -0
  61. data/ext/stemmer4r/libstemmer_c/src_c/stem_porter.c +776 -0
  62. data/ext/stemmer4r/libstemmer_c/src_c/stem_porter.h +16 -0
  63. data/ext/stemmer4r/libstemmer_c/src_c/stem_portuguese.c +1027 -0
  64. data/ext/stemmer4r/libstemmer_c/src_c/stem_portuguese.h +16 -0
  65. data/ext/stemmer4r/libstemmer_c/src_c/stem_russian.c +701 -0
  66. data/ext/stemmer4r/libstemmer_c/src_c/stem_russian.h +16 -0
  67. data/ext/stemmer4r/libstemmer_c/src_c/stem_spanish.c +1109 -0
  68. data/ext/stemmer4r/libstemmer_c/src_c/stem_spanish.h +16 -0
  69. data/ext/stemmer4r/libstemmer_c/src_c/stem_swedish.c +299 -0
  70. data/ext/stemmer4r/libstemmer_c/src_c/stem_swedish.h +16 -0
  71. data/ext/stemmer4r/stemmer4r.c +146 -0
  72. data/stemmer4r.gemspec +23 -0
  73. data/test/CVS/Entries +2 -0
  74. data/test/CVS/Repository +1 -0
  75. data/test/CVS/Root +1 -0
  76. data/test/test.rb +31 -0
  77. data/test/tests/CVS/Entries +12 -0
  78. data/test/tests/CVS/Repository +1 -0
  79. data/test/tests/CVS/Root +1 -0
  80. data/test/tests/da/CVS/Entries +3 -0
  81. data/test/tests/da/CVS/Repository +1 -0
  82. data/test/tests/da/CVS/Root +1 -0
  83. data/test/tests/da/output.txt +23829 -0
  84. data/test/tests/da/voc.txt +23829 -0
  85. data/test/tests/de/CVS/Entries +3 -0
  86. data/test/tests/de/CVS/Repository +1 -0
  87. data/test/tests/de/CVS/Root +1 -0
  88. data/test/tests/de/output.txt +35033 -0
  89. data/test/tests/de/voc.txt +35033 -0
  90. data/test/tests/en/CVS/Entries +3 -0
  91. data/test/tests/en/CVS/Repository +1 -0
  92. data/test/tests/en/CVS/Root +1 -0
  93. data/test/tests/en/output.txt +29400 -0
  94. data/test/tests/en/voc.txt +29400 -0
  95. data/test/tests/es/CVS/Entries +3 -0
  96. data/test/tests/es/CVS/Repository +1 -0
  97. data/test/tests/es/CVS/Root +1 -0
  98. data/test/tests/es/output.txt +28390 -0
  99. data/test/tests/es/voc.txt +28390 -0
  100. data/test/tests/fi/CVS/Entries +3 -0
  101. data/test/tests/fi/CVS/Repository +1 -0
  102. data/test/tests/fi/CVS/Root +1 -0
  103. data/test/tests/fi/output.txt +50000 -0
  104. data/test/tests/fi/voc.txt +50000 -0
  105. data/test/tests/fr/CVS/Entries +3 -0
  106. data/test/tests/fr/CVS/Repository +1 -0
  107. data/test/tests/fr/CVS/Root +1 -0
  108. data/test/tests/fr/output.txt +20403 -0
  109. data/test/tests/fr/voc.txt +20403 -0
  110. data/test/tests/it/CVS/Entries +3 -0
  111. data/test/tests/it/CVS/Repository +1 -0
  112. data/test/tests/it/CVS/Root +1 -0
  113. data/test/tests/it/output.txt +35494 -0
  114. data/test/tests/it/voc.txt +35494 -0
  115. data/test/tests/nl/CVS/Entries +3 -0
  116. data/test/tests/nl/CVS/Repository +1 -0
  117. data/test/tests/nl/CVS/Root +1 -0
  118. data/test/tests/nl/output.txt +45669 -0
  119. data/test/tests/nl/voc.txt +45669 -0
  120. data/test/tests/no/CVS/Entries +3 -0
  121. data/test/tests/no/CVS/Repository +1 -0
  122. data/test/tests/no/CVS/Root +1 -0
  123. data/test/tests/no/output.txt +20628 -0
  124. data/test/tests/no/voc.txt +20628 -0
  125. data/test/tests/pt/CVS/Entries +3 -0
  126. data/test/tests/pt/CVS/Repository +1 -0
  127. data/test/tests/pt/CVS/Root +1 -0
  128. data/test/tests/pt/output.txt +32016 -0
  129. data/test/tests/pt/voc.txt +32016 -0
  130. data/test/tests/ru/CVS/Entries +3 -0
  131. data/test/tests/ru/CVS/Repository +1 -0
  132. data/test/tests/ru/CVS/Root +1 -0
  133. data/test/tests/ru/output.txt +49673 -0
  134. data/test/tests/ru/voc.txt +49673 -0
  135. data/test/tests/sv/CVS/Entries +3 -0
  136. data/test/tests/sv/CVS/Repository +1 -0
  137. data/test/tests/sv/CVS/Root +1 -0
  138. data/test/tests/sv/output.txt +30623 -0
  139. data/test/tests/sv/voc.txt +30623 -0
  140. metadata +221 -0
@@ -0,0 +1,16 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ extern struct SN_env * spanish_create_env(void);
9
+ extern void spanish_close_env(struct SN_env * z);
10
+
11
+ extern int spanish_stem(struct SN_env * z);
12
+
13
+ #ifdef __cplusplus
14
+ }
15
+ #endif
16
+
@@ -0,0 +1,299 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #include "../runtime/header.h"
5
+
6
+ extern int swedish_stem(struct SN_env * z);
7
+ static int r_other_suffix(struct SN_env * z);
8
+ static int r_consonant_pair(struct SN_env * z);
9
+ static int r_main_suffix(struct SN_env * z);
10
+ static int r_mark_regions(struct SN_env * z);
11
+
12
+ extern struct SN_env * swedish_create_env(void);
13
+ extern void swedish_close_env(struct SN_env * z);
14
+
15
+ static symbol s_0_0[1] = { 'a' };
16
+ static symbol s_0_1[4] = { 'a', 'r', 'n', 'a' };
17
+ static symbol s_0_2[4] = { 'e', 'r', 'n', 'a' };
18
+ static symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' };
19
+ static symbol s_0_4[4] = { 'o', 'r', 'n', 'a' };
20
+ static symbol s_0_5[2] = { 'a', 'd' };
21
+ static symbol s_0_6[1] = { 'e' };
22
+ static symbol s_0_7[3] = { 'a', 'd', 'e' };
23
+ static symbol s_0_8[4] = { 'a', 'n', 'd', 'e' };
24
+ static symbol s_0_9[4] = { 'a', 'r', 'n', 'e' };
25
+ static symbol s_0_10[3] = { 'a', 'r', 'e' };
26
+ static symbol s_0_11[4] = { 'a', 's', 't', 'e' };
27
+ static symbol s_0_12[2] = { 'e', 'n' };
28
+ static symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' };
29
+ static symbol s_0_14[4] = { 'a', 'r', 'e', 'n' };
30
+ static symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' };
31
+ static symbol s_0_16[3] = { 'e', 'r', 'n' };
32
+ static symbol s_0_17[2] = { 'a', 'r' };
33
+ static symbol s_0_18[2] = { 'e', 'r' };
34
+ static symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' };
35
+ static symbol s_0_20[2] = { 'o', 'r' };
36
+ static symbol s_0_21[1] = { 's' };
37
+ static symbol s_0_22[2] = { 'a', 's' };
38
+ static symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' };
39
+ static symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' };
40
+ static symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' };
41
+ static symbol s_0_26[2] = { 'e', 's' };
42
+ static symbol s_0_27[4] = { 'a', 'd', 'e', 's' };
43
+ static symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' };
44
+ static symbol s_0_29[3] = { 'e', 'n', 's' };
45
+ static symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' };
46
+ static symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' };
47
+ static symbol s_0_32[4] = { 'e', 'r', 'n', 's' };
48
+ static symbol s_0_33[2] = { 'a', 't' };
49
+ static symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' };
50
+ static symbol s_0_35[3] = { 'h', 'e', 't' };
51
+ static symbol s_0_36[3] = { 'a', 's', 't' };
52
+
53
+ static struct among a_0[37] =
54
+ {
55
+ /* 0 */ { 1, s_0_0, -1, 1, 0},
56
+ /* 1 */ { 4, s_0_1, 0, 1, 0},
57
+ /* 2 */ { 4, s_0_2, 0, 1, 0},
58
+ /* 3 */ { 7, s_0_3, 2, 1, 0},
59
+ /* 4 */ { 4, s_0_4, 0, 1, 0},
60
+ /* 5 */ { 2, s_0_5, -1, 1, 0},
61
+ /* 6 */ { 1, s_0_6, -1, 1, 0},
62
+ /* 7 */ { 3, s_0_7, 6, 1, 0},
63
+ /* 8 */ { 4, s_0_8, 6, 1, 0},
64
+ /* 9 */ { 4, s_0_9, 6, 1, 0},
65
+ /* 10 */ { 3, s_0_10, 6, 1, 0},
66
+ /* 11 */ { 4, s_0_11, 6, 1, 0},
67
+ /* 12 */ { 2, s_0_12, -1, 1, 0},
68
+ /* 13 */ { 5, s_0_13, 12, 1, 0},
69
+ /* 14 */ { 4, s_0_14, 12, 1, 0},
70
+ /* 15 */ { 5, s_0_15, 12, 1, 0},
71
+ /* 16 */ { 3, s_0_16, -1, 1, 0},
72
+ /* 17 */ { 2, s_0_17, -1, 1, 0},
73
+ /* 18 */ { 2, s_0_18, -1, 1, 0},
74
+ /* 19 */ { 5, s_0_19, 18, 1, 0},
75
+ /* 20 */ { 2, s_0_20, -1, 1, 0},
76
+ /* 21 */ { 1, s_0_21, -1, 2, 0},
77
+ /* 22 */ { 2, s_0_22, 21, 1, 0},
78
+ /* 23 */ { 5, s_0_23, 22, 1, 0},
79
+ /* 24 */ { 5, s_0_24, 22, 1, 0},
80
+ /* 25 */ { 5, s_0_25, 22, 1, 0},
81
+ /* 26 */ { 2, s_0_26, 21, 1, 0},
82
+ /* 27 */ { 4, s_0_27, 26, 1, 0},
83
+ /* 28 */ { 5, s_0_28, 26, 1, 0},
84
+ /* 29 */ { 3, s_0_29, 21, 1, 0},
85
+ /* 30 */ { 5, s_0_30, 29, 1, 0},
86
+ /* 31 */ { 6, s_0_31, 29, 1, 0},
87
+ /* 32 */ { 4, s_0_32, 21, 1, 0},
88
+ /* 33 */ { 2, s_0_33, -1, 1, 0},
89
+ /* 34 */ { 5, s_0_34, -1, 1, 0},
90
+ /* 35 */ { 3, s_0_35, -1, 1, 0},
91
+ /* 36 */ { 3, s_0_36, -1, 1, 0}
92
+ };
93
+
94
+ static symbol s_1_0[2] = { 'd', 'd' };
95
+ static symbol s_1_1[2] = { 'g', 'd' };
96
+ static symbol s_1_2[2] = { 'n', 'n' };
97
+ static symbol s_1_3[2] = { 'd', 't' };
98
+ static symbol s_1_4[2] = { 'g', 't' };
99
+ static symbol s_1_5[2] = { 'k', 't' };
100
+ static symbol s_1_6[2] = { 't', 't' };
101
+
102
+ static struct among a_1[7] =
103
+ {
104
+ /* 0 */ { 2, s_1_0, -1, -1, 0},
105
+ /* 1 */ { 2, s_1_1, -1, -1, 0},
106
+ /* 2 */ { 2, s_1_2, -1, -1, 0},
107
+ /* 3 */ { 2, s_1_3, -1, -1, 0},
108
+ /* 4 */ { 2, s_1_4, -1, -1, 0},
109
+ /* 5 */ { 2, s_1_5, -1, -1, 0},
110
+ /* 6 */ { 2, s_1_6, -1, -1, 0}
111
+ };
112
+
113
+ static symbol s_2_0[2] = { 'i', 'g' };
114
+ static symbol s_2_1[3] = { 'l', 'i', 'g' };
115
+ static symbol s_2_2[3] = { 'e', 'l', 's' };
116
+ static symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' };
117
+ static symbol s_2_4[4] = { 'l', 246, 's', 't' };
118
+
119
+ static struct among a_2[5] =
120
+ {
121
+ /* 0 */ { 2, s_2_0, -1, 1, 0},
122
+ /* 1 */ { 3, s_2_1, 0, 1, 0},
123
+ /* 2 */ { 3, s_2_2, -1, 1, 0},
124
+ /* 3 */ { 5, s_2_3, -1, 3, 0},
125
+ /* 4 */ { 4, s_2_4, -1, 2, 0}
126
+ };
127
+
128
+ static unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
129
+
130
+ static unsigned char g_s_ending[] = { 119, 127, 149 };
131
+
132
+ static symbol s_0[] = { 'l', 246, 's' };
133
+ static symbol s_1[] = { 'f', 'u', 'l', 'l' };
134
+
135
+ static int r_mark_regions(struct SN_env * z) {
136
+ z->I[0] = z->l;
137
+ while(1) { /* goto, line 30 */
138
+ int c = z->c;
139
+ if (!(in_grouping(z, g_v, 97, 246))) goto lab0;
140
+ z->c = c;
141
+ break;
142
+ lab0:
143
+ z->c = c;
144
+ if (z->c >= z->l) return 0;
145
+ z->c++;
146
+ }
147
+ while(1) { /* gopast, line 30 */
148
+ if (!(out_grouping(z, g_v, 97, 246))) goto lab1;
149
+ break;
150
+ lab1:
151
+ if (z->c >= z->l) return 0;
152
+ z->c++;
153
+ }
154
+ z->I[0] = z->c; /* setmark p1, line 30 */
155
+ /* try, line 31 */
156
+ if (!(z->I[0] < 3)) goto lab2;
157
+ z->I[0] = 3;
158
+ lab2:
159
+ return 1;
160
+ }
161
+
162
+ static int r_main_suffix(struct SN_env * z) {
163
+ int among_var;
164
+ { int m3; /* setlimit, line 37 */
165
+ int m = z->l - z->c; (void) m;
166
+ if (z->c < z->I[0]) return 0;
167
+ z->c = z->I[0]; /* tomark, line 37 */
168
+ m3 = z->lb; z->lb = z->c;
169
+ z->c = z->l - m;
170
+ z->ket = z->c; /* [, line 37 */
171
+ among_var = find_among_b(z, a_0, 37); /* substring, line 37 */
172
+ if (!(among_var)) { z->lb = m3; return 0; }
173
+ z->bra = z->c; /* ], line 37 */
174
+ z->lb = m3;
175
+ }
176
+ switch(among_var) {
177
+ case 0: return 0;
178
+ case 1:
179
+ { int ret;
180
+ ret = slice_del(z); /* delete, line 44 */
181
+ if (ret < 0) return ret;
182
+ }
183
+ break;
184
+ case 2:
185
+ if (!(in_grouping_b(z, g_s_ending, 98, 121))) return 0;
186
+ { int ret;
187
+ ret = slice_del(z); /* delete, line 46 */
188
+ if (ret < 0) return ret;
189
+ }
190
+ break;
191
+ }
192
+ return 1;
193
+ }
194
+
195
+ static int r_consonant_pair(struct SN_env * z) {
196
+ { int m3; /* setlimit, line 50 */
197
+ int m = z->l - z->c; (void) m;
198
+ if (z->c < z->I[0]) return 0;
199
+ z->c = z->I[0]; /* tomark, line 50 */
200
+ m3 = z->lb; z->lb = z->c;
201
+ z->c = z->l - m;
202
+ { int m = z->l - z->c; (void) m; /* and, line 52 */
203
+ if (!(find_among_b(z, a_1, 7))) { z->lb = m3; return 0; } /* among, line 51 */
204
+ z->c = z->l - m;
205
+ z->ket = z->c; /* [, line 52 */
206
+ if (z->c <= z->lb) { z->lb = m3; return 0; }
207
+ z->c--; /* next, line 52 */
208
+ z->bra = z->c; /* ], line 52 */
209
+ { int ret;
210
+ ret = slice_del(z); /* delete, line 52 */
211
+ if (ret < 0) return ret;
212
+ }
213
+ }
214
+ z->lb = m3;
215
+ }
216
+ return 1;
217
+ }
218
+
219
+ static int r_other_suffix(struct SN_env * z) {
220
+ int among_var;
221
+ { int m3; /* setlimit, line 55 */
222
+ int m = z->l - z->c; (void) m;
223
+ if (z->c < z->I[0]) return 0;
224
+ z->c = z->I[0]; /* tomark, line 55 */
225
+ m3 = z->lb; z->lb = z->c;
226
+ z->c = z->l - m;
227
+ z->ket = z->c; /* [, line 56 */
228
+ among_var = find_among_b(z, a_2, 5); /* substring, line 56 */
229
+ if (!(among_var)) { z->lb = m3; return 0; }
230
+ z->bra = z->c; /* ], line 56 */
231
+ switch(among_var) {
232
+ case 0: { z->lb = m3; return 0; }
233
+ case 1:
234
+ { int ret;
235
+ ret = slice_del(z); /* delete, line 57 */
236
+ if (ret < 0) return ret;
237
+ }
238
+ break;
239
+ case 2:
240
+ { int ret;
241
+ ret = slice_from_s(z, 3, s_0); /* <-, line 58 */
242
+ if (ret < 0) return ret;
243
+ }
244
+ break;
245
+ case 3:
246
+ { int ret;
247
+ ret = slice_from_s(z, 4, s_1); /* <-, line 59 */
248
+ if (ret < 0) return ret;
249
+ }
250
+ break;
251
+ }
252
+ z->lb = m3;
253
+ }
254
+ return 1;
255
+ }
256
+
257
+ extern int swedish_stem(struct SN_env * z) {
258
+ { int c = z->c; /* do, line 66 */
259
+ { int ret = r_mark_regions(z);
260
+ if (ret == 0) goto lab0; /* call mark_regions, line 66 */
261
+ if (ret < 0) return ret;
262
+ }
263
+ lab0:
264
+ z->c = c;
265
+ }
266
+ z->lb = z->c; z->c = z->l; /* backwards, line 67 */
267
+
268
+ { int m = z->l - z->c; (void) m; /* do, line 68 */
269
+ { int ret = r_main_suffix(z);
270
+ if (ret == 0) goto lab1; /* call main_suffix, line 68 */
271
+ if (ret < 0) return ret;
272
+ }
273
+ lab1:
274
+ z->c = z->l - m;
275
+ }
276
+ { int m = z->l - z->c; (void) m; /* do, line 69 */
277
+ { int ret = r_consonant_pair(z);
278
+ if (ret == 0) goto lab2; /* call consonant_pair, line 69 */
279
+ if (ret < 0) return ret;
280
+ }
281
+ lab2:
282
+ z->c = z->l - m;
283
+ }
284
+ { int m = z->l - z->c; (void) m; /* do, line 70 */
285
+ { int ret = r_other_suffix(z);
286
+ if (ret == 0) goto lab3; /* call other_suffix, line 70 */
287
+ if (ret < 0) return ret;
288
+ }
289
+ lab3:
290
+ z->c = z->l - m;
291
+ }
292
+ z->c = z->lb;
293
+ return 1;
294
+ }
295
+
296
+ extern struct SN_env * swedish_create_env(void) { return SN_create_env(0, 1, 0); }
297
+
298
+ extern void swedish_close_env(struct SN_env * z) { SN_close_env(z); }
299
+
@@ -0,0 +1,16 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ extern struct SN_env * swedish_create_env(void);
9
+ extern void swedish_close_env(struct SN_env * z);
10
+
11
+ extern int swedish_stem(struct SN_env * z);
12
+
13
+ #ifdef __cplusplus
14
+ }
15
+ #endif
16
+
@@ -0,0 +1,146 @@
1
+ /*
2
+ * stemmer.c - An interface for libstemmer library.
3
+ * http://snowball.tartarus.org/
4
+ *
5
+ * Copyright (c) Fabien POTENCIER 2005
6
+ *
7
+ */
8
+
9
+ #include "ruby.h"
10
+ #include "libstemmer.h"
11
+
12
+ /*
13
+ * Document-class: Stemmer
14
+ *
15
+ * == Summary
16
+ *
17
+ * Ruby extension for stemming.
18
+ *
19
+ * == Abstract
20
+ *
21
+ * Stemmer is a wrapper class for the <tt>libstemmer</tt> library,
22
+ * the snowball stemming algorithms.
23
+ *
24
+ */
25
+
26
+ static VALUE rb_cStemmer;
27
+
28
+ struct sb_stemmer {
29
+ struct SN_env * (*create)(void);
30
+ void (*close)(struct SN_env *);
31
+ int (*stem)(struct SN_env *);
32
+
33
+ struct SN_env * env;
34
+ };
35
+
36
+ static void
37
+ stemmer_mark(struct sb_stemmer* stemmer)
38
+ {
39
+ }
40
+
41
+ static void
42
+ stemmer_free(struct sb_stemmer* stemmer)
43
+ {
44
+ sb_stemmer_delete(stemmer);
45
+ }
46
+
47
+ /*
48
+ * Document-method: new
49
+ * call-seq: Stemmer.new(language)
50
+ *
51
+ * Creates new code stemmer for +language+.
52
+ *
53
+ * === Parameters
54
+ *
55
+ * +language+:: stemmer language (language ISO code are supported)
56
+ *
57
+ * Currently available languages:
58
+ *
59
+ * * english (en)
60
+ * * french (fr)
61
+ * * spanish (es)
62
+ * * potuguese (pt)
63
+ * * italian (it)
64
+ * * german (de)
65
+ * * dutch (nl)
66
+ * * swedish (se)
67
+ * * danish (da)
68
+ * * russian (ru)
69
+ * * finnish (fi)
70
+ *
71
+ * === Exceptions
72
+ *
73
+ * ArgumentError:: if we can't find an available stemmer for +language+
74
+ *
75
+ * === Examples
76
+ *
77
+ * stemmer = Stemmer.new('en')
78
+ * puts stemmer.stem('testing')
79
+ *
80
+ * stemmer = Stemmer.new('fr')
81
+ * puts stemmer.stem('�t�')
82
+ *
83
+ */
84
+ static VALUE
85
+ stemmer_allocate(VALUE klass, VALUE algorithm)
86
+ {
87
+ struct sb_stemmer* stemmer;
88
+ char * calgorithm;
89
+
90
+ Check_Type(algorithm, T_STRING);
91
+ calgorithm = strdup(STR2CSTR(algorithm));
92
+ if (!strcmp(calgorithm, "es"))
93
+ {
94
+ strcpy(calgorithm, "spanish");
95
+ }
96
+
97
+ stemmer = sb_stemmer_new(calgorithm);
98
+ free(calgorithm);
99
+
100
+ if (stemmer == NULL)
101
+ {
102
+ rb_raise(rb_eArgError, "Algorithm '%s' doesn't exist", STR2CSTR(algorithm));
103
+ }
104
+
105
+ return Data_Wrap_Struct(klass, stemmer_mark, stemmer_free, stemmer);
106
+ }
107
+
108
+ /*
109
+ * Document-method: stem
110
+ * call-seq: stem(str)
111
+ *
112
+ * Stems string and returns the result.
113
+ *
114
+ * === Parameters
115
+ *
116
+ * +str+:: string to stem. String must be encoding the 'right' way (iso-8859-1 for french for example).
117
+ */
118
+ static VALUE
119
+ stemmer_stem(VALUE self, VALUE word)
120
+ {
121
+ VALUE ret;
122
+ sb_symbol *cword;
123
+ const sb_symbol *rcword;
124
+ struct sb_stemmer *stemmer;
125
+
126
+ Check_Type(word, T_STRING);
127
+
128
+ cword = strdup(STR2CSTR(word));
129
+ Data_Get_Struct(self, struct sb_stemmer, stemmer);
130
+ rcword = sb_stemmer_stem(stemmer, cword, RSTRING(word)->len);
131
+
132
+ ret = rb_str_new2(rcword);
133
+ free(cword);
134
+
135
+ return ret;
136
+ }
137
+
138
+ void
139
+ Init_stemmer4r()
140
+ {
141
+ rb_cStemmer = rb_define_class("Stemmer", rb_cObject);
142
+ rb_define_singleton_method(rb_cStemmer, "new", stemmer_allocate, 1);
143
+ rb_define_method(rb_cStemmer, "stem", stemmer_stem, 1);
144
+ rb_define_method(rb_cStemmer, "list", stemmer_list, 0);
145
+ }
146
+
data/stemmer4r.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+
3
+ spec = Gem::Specification.new do |s|
4
+ s.name = 'stemmer4r'
5
+ s.version = '0.1'
6
+ s.author = "Fabien POTENCIER"
7
+ s.email = "fabien.potencier@gmail.com"
8
+ s.homepage = "http://stemmer4r.rubyforge.org"
9
+ s.rubyforge_project = "stemmer4r"
10
+ s.summary = <<-EOF
11
+ Stemmer4r is a Ruby extension that wraps the snowball stemmer library (libstemmer).
12
+ EOF
13
+ s.description = <<-EOF
14
+ Stemmer4r is a Ruby extension that wraps the snowball stemmer library (libstemmer).
15
+ EOF
16
+ s.files = Dir.glob("**/*").delete_if { |item| item.include?(".svn") }
17
+ s.extensions << "ext/stemmer4r/extconf.rb"
18
+ s.require_path = '.'
19
+ s.autorequire = 'stemmer4r'
20
+ s.has_rdoc = true
21
+ s.extra_rdoc_files = ["README"]
22
+ s.test_files = Dir.glob('test/*.rb')
23
+ end
data/test/CVS/Entries ADDED
@@ -0,0 +1,2 @@
1
+ /test.rb/1.1.1.1/Wed May 11 07:39:43 2005//
2
+ D/tests////
@@ -0,0 +1 @@
1
+ stemmer4r/test
data/test/CVS/Root ADDED
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r
data/test/test.rb ADDED
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ begin
4
+ require 'rubygems'
5
+ require_gem 'stemmer4r'
6
+ rescue LoadError
7
+ require 'stemmer4r'
8
+ end
9
+
10
+ require 'test/unit'
11
+
12
+ class StemmerTest < Test::Unit::TestCase
13
+ def test_valid_language
14
+ assert_not_nil(Stemmer.new('fr'))
15
+ assert_nothing_raised() { Stemmer.new('fr') }
16
+ assert_raise(ArgumentError) { Stemmer.new('notavalidlanguage') }
17
+ end
18
+
19
+ def test_stemmer
20
+ dir = File.dirname(__FILE__)
21
+ %w{da de nl en es fi fr it no pt ru sv}.each { |language|
22
+ puts "Testing '#{language}' algorithm..."
23
+ stemmer = Stemmer.new(language)
24
+ input = File.new("#{dir}/tests/#{language}/voc.txt")
25
+ output = File.new("#{dir}/tests/#{language}/output.txt")
26
+ input.each_line { |word|
27
+ assert_equal(output.gets.chomp!, stemmer.stem(word.chomp!))
28
+ }
29
+ }
30
+ end
31
+ end
@@ -0,0 +1,12 @@
1
+ D/da////
2
+ D/de////
3
+ D/en////
4
+ D/es////
5
+ D/fi////
6
+ D/fr////
7
+ D/it////
8
+ D/nl////
9
+ D/no////
10
+ D/pt////
11
+ D/ru////
12
+ D/sv////
@@ -0,0 +1 @@
1
+ stemmer4r/test/tests
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r
@@ -0,0 +1,3 @@
1
+ /output.txt/1.1.1.1/Wed May 11 07:41:22 2005//
2
+ /voc.txt/1.1.1.1/Wed May 11 07:41:28 2005//
3
+ D
@@ -0,0 +1 @@
1
+ stemmer4r/test/tests/da
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r