stemmer4r 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. data/CVS/Entries +5 -0
  2. data/CVS/Repository +1 -0
  3. data/CVS/Root +1 -0
  4. data/LICENSE +20 -0
  5. data/README +9 -0
  6. data/ext/CVS/Entries +1 -0
  7. data/ext/CVS/Repository +1 -0
  8. data/ext/CVS/Root +1 -0
  9. data/ext/stemmer4r/CVS/Entries +4 -0
  10. data/ext/stemmer4r/CVS/Repository +1 -0
  11. data/ext/stemmer4r/CVS/Root +1 -0
  12. data/ext/stemmer4r/depend +14 -0
  13. data/ext/stemmer4r/extconf.rb +8 -0
  14. data/ext/stemmer4r/libstemmer_c/CVS/Entries +7 -0
  15. data/ext/stemmer4r/libstemmer_c/CVS/Repository +1 -0
  16. data/ext/stemmer4r/libstemmer_c/CVS/Root +1 -0
  17. data/ext/stemmer4r/libstemmer_c/MANIFEST +39 -0
  18. data/ext/stemmer4r/libstemmer_c/Makefile +5 -0
  19. data/ext/stemmer4r/libstemmer_c/include/CVS/Entries +2 -0
  20. data/ext/stemmer4r/libstemmer_c/include/CVS/Repository +1 -0
  21. data/ext/stemmer4r/libstemmer_c/include/CVS/Root +1 -0
  22. data/ext/stemmer4r/libstemmer_c/include/libstemmer.h +63 -0
  23. data/ext/stemmer4r/libstemmer_c/libstemmer/CVS/Entries +3 -0
  24. data/ext/stemmer4r/libstemmer_c/libstemmer/CVS/Repository +1 -0
  25. data/ext/stemmer4r/libstemmer_c/libstemmer/CVS/Root +1 -0
  26. data/ext/stemmer4r/libstemmer_c/libstemmer/libstemmer.c +78 -0
  27. data/ext/stemmer4r/libstemmer_c/libstemmer/modules.h +96 -0
  28. data/ext/stemmer4r/libstemmer_c/mkinc.mak +42 -0
  29. data/ext/stemmer4r/libstemmer_c/runtime/CVS/Entries +5 -0
  30. data/ext/stemmer4r/libstemmer_c/runtime/CVS/Repository +1 -0
  31. data/ext/stemmer4r/libstemmer_c/runtime/CVS/Root +1 -0
  32. data/ext/stemmer4r/libstemmer_c/runtime/api.c +69 -0
  33. data/ext/stemmer4r/libstemmer_c/runtime/api.h +27 -0
  34. data/ext/stemmer4r/libstemmer_c/runtime/header.h +56 -0
  35. data/ext/stemmer4r/libstemmer_c/runtime/utilities.c +403 -0
  36. data/ext/stemmer4r/libstemmer_c/src_c/CVS/Entries +33 -0
  37. data/ext/stemmer4r/libstemmer_c/src_c/CVS/Repository +1 -0
  38. data/ext/stemmer4r/libstemmer_c/src_c/CVS/Root +1 -0
  39. data/ext/stemmer4r/libstemmer_c/src_c/stem_danish.c +330 -0
  40. data/ext/stemmer4r/libstemmer_c/src_c/stem_danish.h +16 -0
  41. data/ext/stemmer4r/libstemmer_c/src_c/stem_dutch.c +635 -0
  42. data/ext/stemmer4r/libstemmer_c/src_c/stem_dutch.h +16 -0
  43. data/ext/stemmer4r/libstemmer_c/src_c/stem_english.c +1109 -0
  44. data/ext/stemmer4r/libstemmer_c/src_c/stem_english.h +16 -0
  45. data/ext/stemmer4r/libstemmer_c/src_c/stem_finnish.c +792 -0
  46. data/ext/stemmer4r/libstemmer_c/src_c/stem_finnish.h +16 -0
  47. data/ext/stemmer4r/libstemmer_c/src_c/stem_french.c +1276 -0
  48. data/ext/stemmer4r/libstemmer_c/src_c/stem_french.h +16 -0
  49. data/ext/stemmer4r/libstemmer_c/src_c/stem_german.c +504 -0
  50. data/ext/stemmer4r/libstemmer_c/src_c/stem_german.h +16 -0
  51. data/ext/stemmer4r/libstemmer_c/src_c/stem_german2.c +549 -0
  52. data/ext/stemmer4r/libstemmer_c/src_c/stem_german2.h +16 -0
  53. data/ext/stemmer4r/libstemmer_c/src_c/stem_italian.c +1087 -0
  54. data/ext/stemmer4r/libstemmer_c/src_c/stem_italian.h +16 -0
  55. data/ext/stemmer4r/libstemmer_c/src_c/stem_kraaij_pohlmann.c +1780 -0
  56. data/ext/stemmer4r/libstemmer_c/src_c/stem_kraaij_pohlmann.h +16 -0
  57. data/ext/stemmer4r/libstemmer_c/src_c/stem_lovins.c +1752 -0
  58. data/ext/stemmer4r/libstemmer_c/src_c/stem_lovins.h +16 -0
  59. data/ext/stemmer4r/libstemmer_c/src_c/stem_norwegian.c +279 -0
  60. data/ext/stemmer4r/libstemmer_c/src_c/stem_norwegian.h +16 -0
  61. data/ext/stemmer4r/libstemmer_c/src_c/stem_porter.c +776 -0
  62. data/ext/stemmer4r/libstemmer_c/src_c/stem_porter.h +16 -0
  63. data/ext/stemmer4r/libstemmer_c/src_c/stem_portuguese.c +1027 -0
  64. data/ext/stemmer4r/libstemmer_c/src_c/stem_portuguese.h +16 -0
  65. data/ext/stemmer4r/libstemmer_c/src_c/stem_russian.c +701 -0
  66. data/ext/stemmer4r/libstemmer_c/src_c/stem_russian.h +16 -0
  67. data/ext/stemmer4r/libstemmer_c/src_c/stem_spanish.c +1109 -0
  68. data/ext/stemmer4r/libstemmer_c/src_c/stem_spanish.h +16 -0
  69. data/ext/stemmer4r/libstemmer_c/src_c/stem_swedish.c +299 -0
  70. data/ext/stemmer4r/libstemmer_c/src_c/stem_swedish.h +16 -0
  71. data/ext/stemmer4r/stemmer4r.c +146 -0
  72. data/stemmer4r.gemspec +23 -0
  73. data/test/CVS/Entries +2 -0
  74. data/test/CVS/Repository +1 -0
  75. data/test/CVS/Root +1 -0
  76. data/test/test.rb +31 -0
  77. data/test/tests/CVS/Entries +12 -0
  78. data/test/tests/CVS/Repository +1 -0
  79. data/test/tests/CVS/Root +1 -0
  80. data/test/tests/da/CVS/Entries +3 -0
  81. data/test/tests/da/CVS/Repository +1 -0
  82. data/test/tests/da/CVS/Root +1 -0
  83. data/test/tests/da/output.txt +23829 -0
  84. data/test/tests/da/voc.txt +23829 -0
  85. data/test/tests/de/CVS/Entries +3 -0
  86. data/test/tests/de/CVS/Repository +1 -0
  87. data/test/tests/de/CVS/Root +1 -0
  88. data/test/tests/de/output.txt +35033 -0
  89. data/test/tests/de/voc.txt +35033 -0
  90. data/test/tests/en/CVS/Entries +3 -0
  91. data/test/tests/en/CVS/Repository +1 -0
  92. data/test/tests/en/CVS/Root +1 -0
  93. data/test/tests/en/output.txt +29400 -0
  94. data/test/tests/en/voc.txt +29400 -0
  95. data/test/tests/es/CVS/Entries +3 -0
  96. data/test/tests/es/CVS/Repository +1 -0
  97. data/test/tests/es/CVS/Root +1 -0
  98. data/test/tests/es/output.txt +28390 -0
  99. data/test/tests/es/voc.txt +28390 -0
  100. data/test/tests/fi/CVS/Entries +3 -0
  101. data/test/tests/fi/CVS/Repository +1 -0
  102. data/test/tests/fi/CVS/Root +1 -0
  103. data/test/tests/fi/output.txt +50000 -0
  104. data/test/tests/fi/voc.txt +50000 -0
  105. data/test/tests/fr/CVS/Entries +3 -0
  106. data/test/tests/fr/CVS/Repository +1 -0
  107. data/test/tests/fr/CVS/Root +1 -0
  108. data/test/tests/fr/output.txt +20403 -0
  109. data/test/tests/fr/voc.txt +20403 -0
  110. data/test/tests/it/CVS/Entries +3 -0
  111. data/test/tests/it/CVS/Repository +1 -0
  112. data/test/tests/it/CVS/Root +1 -0
  113. data/test/tests/it/output.txt +35494 -0
  114. data/test/tests/it/voc.txt +35494 -0
  115. data/test/tests/nl/CVS/Entries +3 -0
  116. data/test/tests/nl/CVS/Repository +1 -0
  117. data/test/tests/nl/CVS/Root +1 -0
  118. data/test/tests/nl/output.txt +45669 -0
  119. data/test/tests/nl/voc.txt +45669 -0
  120. data/test/tests/no/CVS/Entries +3 -0
  121. data/test/tests/no/CVS/Repository +1 -0
  122. data/test/tests/no/CVS/Root +1 -0
  123. data/test/tests/no/output.txt +20628 -0
  124. data/test/tests/no/voc.txt +20628 -0
  125. data/test/tests/pt/CVS/Entries +3 -0
  126. data/test/tests/pt/CVS/Repository +1 -0
  127. data/test/tests/pt/CVS/Root +1 -0
  128. data/test/tests/pt/output.txt +32016 -0
  129. data/test/tests/pt/voc.txt +32016 -0
  130. data/test/tests/ru/CVS/Entries +3 -0
  131. data/test/tests/ru/CVS/Repository +1 -0
  132. data/test/tests/ru/CVS/Root +1 -0
  133. data/test/tests/ru/output.txt +49673 -0
  134. data/test/tests/ru/voc.txt +49673 -0
  135. data/test/tests/sv/CVS/Entries +3 -0
  136. data/test/tests/sv/CVS/Repository +1 -0
  137. data/test/tests/sv/CVS/Root +1 -0
  138. data/test/tests/sv/output.txt +30623 -0
  139. data/test/tests/sv/voc.txt +30623 -0
  140. metadata +221 -0
@@ -0,0 +1,16 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ extern struct SN_env * spanish_create_env(void);
9
+ extern void spanish_close_env(struct SN_env * z);
10
+
11
+ extern int spanish_stem(struct SN_env * z);
12
+
13
+ #ifdef __cplusplus
14
+ }
15
+ #endif
16
+
@@ -0,0 +1,299 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #include "../runtime/header.h"
5
+
6
+ extern int swedish_stem(struct SN_env * z);
7
+ static int r_other_suffix(struct SN_env * z);
8
+ static int r_consonant_pair(struct SN_env * z);
9
+ static int r_main_suffix(struct SN_env * z);
10
+ static int r_mark_regions(struct SN_env * z);
11
+
12
+ extern struct SN_env * swedish_create_env(void);
13
+ extern void swedish_close_env(struct SN_env * z);
14
+
15
+ static symbol s_0_0[1] = { 'a' };
16
+ static symbol s_0_1[4] = { 'a', 'r', 'n', 'a' };
17
+ static symbol s_0_2[4] = { 'e', 'r', 'n', 'a' };
18
+ static symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' };
19
+ static symbol s_0_4[4] = { 'o', 'r', 'n', 'a' };
20
+ static symbol s_0_5[2] = { 'a', 'd' };
21
+ static symbol s_0_6[1] = { 'e' };
22
+ static symbol s_0_7[3] = { 'a', 'd', 'e' };
23
+ static symbol s_0_8[4] = { 'a', 'n', 'd', 'e' };
24
+ static symbol s_0_9[4] = { 'a', 'r', 'n', 'e' };
25
+ static symbol s_0_10[3] = { 'a', 'r', 'e' };
26
+ static symbol s_0_11[4] = { 'a', 's', 't', 'e' };
27
+ static symbol s_0_12[2] = { 'e', 'n' };
28
+ static symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' };
29
+ static symbol s_0_14[4] = { 'a', 'r', 'e', 'n' };
30
+ static symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' };
31
+ static symbol s_0_16[3] = { 'e', 'r', 'n' };
32
+ static symbol s_0_17[2] = { 'a', 'r' };
33
+ static symbol s_0_18[2] = { 'e', 'r' };
34
+ static symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' };
35
+ static symbol s_0_20[2] = { 'o', 'r' };
36
+ static symbol s_0_21[1] = { 's' };
37
+ static symbol s_0_22[2] = { 'a', 's' };
38
+ static symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' };
39
+ static symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' };
40
+ static symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' };
41
+ static symbol s_0_26[2] = { 'e', 's' };
42
+ static symbol s_0_27[4] = { 'a', 'd', 'e', 's' };
43
+ static symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' };
44
+ static symbol s_0_29[3] = { 'e', 'n', 's' };
45
+ static symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' };
46
+ static symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' };
47
+ static symbol s_0_32[4] = { 'e', 'r', 'n', 's' };
48
+ static symbol s_0_33[2] = { 'a', 't' };
49
+ static symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' };
50
+ static symbol s_0_35[3] = { 'h', 'e', 't' };
51
+ static symbol s_0_36[3] = { 'a', 's', 't' };
52
+
53
+ static struct among a_0[37] =
54
+ {
55
+ /* 0 */ { 1, s_0_0, -1, 1, 0},
56
+ /* 1 */ { 4, s_0_1, 0, 1, 0},
57
+ /* 2 */ { 4, s_0_2, 0, 1, 0},
58
+ /* 3 */ { 7, s_0_3, 2, 1, 0},
59
+ /* 4 */ { 4, s_0_4, 0, 1, 0},
60
+ /* 5 */ { 2, s_0_5, -1, 1, 0},
61
+ /* 6 */ { 1, s_0_6, -1, 1, 0},
62
+ /* 7 */ { 3, s_0_7, 6, 1, 0},
63
+ /* 8 */ { 4, s_0_8, 6, 1, 0},
64
+ /* 9 */ { 4, s_0_9, 6, 1, 0},
65
+ /* 10 */ { 3, s_0_10, 6, 1, 0},
66
+ /* 11 */ { 4, s_0_11, 6, 1, 0},
67
+ /* 12 */ { 2, s_0_12, -1, 1, 0},
68
+ /* 13 */ { 5, s_0_13, 12, 1, 0},
69
+ /* 14 */ { 4, s_0_14, 12, 1, 0},
70
+ /* 15 */ { 5, s_0_15, 12, 1, 0},
71
+ /* 16 */ { 3, s_0_16, -1, 1, 0},
72
+ /* 17 */ { 2, s_0_17, -1, 1, 0},
73
+ /* 18 */ { 2, s_0_18, -1, 1, 0},
74
+ /* 19 */ { 5, s_0_19, 18, 1, 0},
75
+ /* 20 */ { 2, s_0_20, -1, 1, 0},
76
+ /* 21 */ { 1, s_0_21, -1, 2, 0},
77
+ /* 22 */ { 2, s_0_22, 21, 1, 0},
78
+ /* 23 */ { 5, s_0_23, 22, 1, 0},
79
+ /* 24 */ { 5, s_0_24, 22, 1, 0},
80
+ /* 25 */ { 5, s_0_25, 22, 1, 0},
81
+ /* 26 */ { 2, s_0_26, 21, 1, 0},
82
+ /* 27 */ { 4, s_0_27, 26, 1, 0},
83
+ /* 28 */ { 5, s_0_28, 26, 1, 0},
84
+ /* 29 */ { 3, s_0_29, 21, 1, 0},
85
+ /* 30 */ { 5, s_0_30, 29, 1, 0},
86
+ /* 31 */ { 6, s_0_31, 29, 1, 0},
87
+ /* 32 */ { 4, s_0_32, 21, 1, 0},
88
+ /* 33 */ { 2, s_0_33, -1, 1, 0},
89
+ /* 34 */ { 5, s_0_34, -1, 1, 0},
90
+ /* 35 */ { 3, s_0_35, -1, 1, 0},
91
+ /* 36 */ { 3, s_0_36, -1, 1, 0}
92
+ };
93
+
94
+ static symbol s_1_0[2] = { 'd', 'd' };
95
+ static symbol s_1_1[2] = { 'g', 'd' };
96
+ static symbol s_1_2[2] = { 'n', 'n' };
97
+ static symbol s_1_3[2] = { 'd', 't' };
98
+ static symbol s_1_4[2] = { 'g', 't' };
99
+ static symbol s_1_5[2] = { 'k', 't' };
100
+ static symbol s_1_6[2] = { 't', 't' };
101
+
102
+ static struct among a_1[7] =
103
+ {
104
+ /* 0 */ { 2, s_1_0, -1, -1, 0},
105
+ /* 1 */ { 2, s_1_1, -1, -1, 0},
106
+ /* 2 */ { 2, s_1_2, -1, -1, 0},
107
+ /* 3 */ { 2, s_1_3, -1, -1, 0},
108
+ /* 4 */ { 2, s_1_4, -1, -1, 0},
109
+ /* 5 */ { 2, s_1_5, -1, -1, 0},
110
+ /* 6 */ { 2, s_1_6, -1, -1, 0}
111
+ };
112
+
113
+ static symbol s_2_0[2] = { 'i', 'g' };
114
+ static symbol s_2_1[3] = { 'l', 'i', 'g' };
115
+ static symbol s_2_2[3] = { 'e', 'l', 's' };
116
+ static symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' };
117
+ static symbol s_2_4[4] = { 'l', 246, 's', 't' };
118
+
119
+ static struct among a_2[5] =
120
+ {
121
+ /* 0 */ { 2, s_2_0, -1, 1, 0},
122
+ /* 1 */ { 3, s_2_1, 0, 1, 0},
123
+ /* 2 */ { 3, s_2_2, -1, 1, 0},
124
+ /* 3 */ { 5, s_2_3, -1, 3, 0},
125
+ /* 4 */ { 4, s_2_4, -1, 2, 0}
126
+ };
127
+
128
+ static unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
129
+
130
+ static unsigned char g_s_ending[] = { 119, 127, 149 };
131
+
132
+ static symbol s_0[] = { 'l', 246, 's' };
133
+ static symbol s_1[] = { 'f', 'u', 'l', 'l' };
134
+
135
+ static int r_mark_regions(struct SN_env * z) {
136
+ z->I[0] = z->l;
137
+ while(1) { /* goto, line 30 */
138
+ int c = z->c;
139
+ if (!(in_grouping(z, g_v, 97, 246))) goto lab0;
140
+ z->c = c;
141
+ break;
142
+ lab0:
143
+ z->c = c;
144
+ if (z->c >= z->l) return 0;
145
+ z->c++;
146
+ }
147
+ while(1) { /* gopast, line 30 */
148
+ if (!(out_grouping(z, g_v, 97, 246))) goto lab1;
149
+ break;
150
+ lab1:
151
+ if (z->c >= z->l) return 0;
152
+ z->c++;
153
+ }
154
+ z->I[0] = z->c; /* setmark p1, line 30 */
155
+ /* try, line 31 */
156
+ if (!(z->I[0] < 3)) goto lab2;
157
+ z->I[0] = 3;
158
+ lab2:
159
+ return 1;
160
+ }
161
+
162
+ static int r_main_suffix(struct SN_env * z) {
163
+ int among_var;
164
+ { int m3; /* setlimit, line 37 */
165
+ int m = z->l - z->c; (void) m;
166
+ if (z->c < z->I[0]) return 0;
167
+ z->c = z->I[0]; /* tomark, line 37 */
168
+ m3 = z->lb; z->lb = z->c;
169
+ z->c = z->l - m;
170
+ z->ket = z->c; /* [, line 37 */
171
+ among_var = find_among_b(z, a_0, 37); /* substring, line 37 */
172
+ if (!(among_var)) { z->lb = m3; return 0; }
173
+ z->bra = z->c; /* ], line 37 */
174
+ z->lb = m3;
175
+ }
176
+ switch(among_var) {
177
+ case 0: return 0;
178
+ case 1:
179
+ { int ret;
180
+ ret = slice_del(z); /* delete, line 44 */
181
+ if (ret < 0) return ret;
182
+ }
183
+ break;
184
+ case 2:
185
+ if (!(in_grouping_b(z, g_s_ending, 98, 121))) return 0;
186
+ { int ret;
187
+ ret = slice_del(z); /* delete, line 46 */
188
+ if (ret < 0) return ret;
189
+ }
190
+ break;
191
+ }
192
+ return 1;
193
+ }
194
+
195
+ static int r_consonant_pair(struct SN_env * z) {
196
+ { int m3; /* setlimit, line 50 */
197
+ int m = z->l - z->c; (void) m;
198
+ if (z->c < z->I[0]) return 0;
199
+ z->c = z->I[0]; /* tomark, line 50 */
200
+ m3 = z->lb; z->lb = z->c;
201
+ z->c = z->l - m;
202
+ { int m = z->l - z->c; (void) m; /* and, line 52 */
203
+ if (!(find_among_b(z, a_1, 7))) { z->lb = m3; return 0; } /* among, line 51 */
204
+ z->c = z->l - m;
205
+ z->ket = z->c; /* [, line 52 */
206
+ if (z->c <= z->lb) { z->lb = m3; return 0; }
207
+ z->c--; /* next, line 52 */
208
+ z->bra = z->c; /* ], line 52 */
209
+ { int ret;
210
+ ret = slice_del(z); /* delete, line 52 */
211
+ if (ret < 0) return ret;
212
+ }
213
+ }
214
+ z->lb = m3;
215
+ }
216
+ return 1;
217
+ }
218
+
219
+ static int r_other_suffix(struct SN_env * z) {
220
+ int among_var;
221
+ { int m3; /* setlimit, line 55 */
222
+ int m = z->l - z->c; (void) m;
223
+ if (z->c < z->I[0]) return 0;
224
+ z->c = z->I[0]; /* tomark, line 55 */
225
+ m3 = z->lb; z->lb = z->c;
226
+ z->c = z->l - m;
227
+ z->ket = z->c; /* [, line 56 */
228
+ among_var = find_among_b(z, a_2, 5); /* substring, line 56 */
229
+ if (!(among_var)) { z->lb = m3; return 0; }
230
+ z->bra = z->c; /* ], line 56 */
231
+ switch(among_var) {
232
+ case 0: { z->lb = m3; return 0; }
233
+ case 1:
234
+ { int ret;
235
+ ret = slice_del(z); /* delete, line 57 */
236
+ if (ret < 0) return ret;
237
+ }
238
+ break;
239
+ case 2:
240
+ { int ret;
241
+ ret = slice_from_s(z, 3, s_0); /* <-, line 58 */
242
+ if (ret < 0) return ret;
243
+ }
244
+ break;
245
+ case 3:
246
+ { int ret;
247
+ ret = slice_from_s(z, 4, s_1); /* <-, line 59 */
248
+ if (ret < 0) return ret;
249
+ }
250
+ break;
251
+ }
252
+ z->lb = m3;
253
+ }
254
+ return 1;
255
+ }
256
+
257
+ extern int swedish_stem(struct SN_env * z) {
258
+ { int c = z->c; /* do, line 66 */
259
+ { int ret = r_mark_regions(z);
260
+ if (ret == 0) goto lab0; /* call mark_regions, line 66 */
261
+ if (ret < 0) return ret;
262
+ }
263
+ lab0:
264
+ z->c = c;
265
+ }
266
+ z->lb = z->c; z->c = z->l; /* backwards, line 67 */
267
+
268
+ { int m = z->l - z->c; (void) m; /* do, line 68 */
269
+ { int ret = r_main_suffix(z);
270
+ if (ret == 0) goto lab1; /* call main_suffix, line 68 */
271
+ if (ret < 0) return ret;
272
+ }
273
+ lab1:
274
+ z->c = z->l - m;
275
+ }
276
+ { int m = z->l - z->c; (void) m; /* do, line 69 */
277
+ { int ret = r_consonant_pair(z);
278
+ if (ret == 0) goto lab2; /* call consonant_pair, line 69 */
279
+ if (ret < 0) return ret;
280
+ }
281
+ lab2:
282
+ z->c = z->l - m;
283
+ }
284
+ { int m = z->l - z->c; (void) m; /* do, line 70 */
285
+ { int ret = r_other_suffix(z);
286
+ if (ret == 0) goto lab3; /* call other_suffix, line 70 */
287
+ if (ret < 0) return ret;
288
+ }
289
+ lab3:
290
+ z->c = z->l - m;
291
+ }
292
+ z->c = z->lb;
293
+ return 1;
294
+ }
295
+
296
+ extern struct SN_env * swedish_create_env(void) { return SN_create_env(0, 1, 0); }
297
+
298
+ extern void swedish_close_env(struct SN_env * z) { SN_close_env(z); }
299
+
@@ -0,0 +1,16 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ extern struct SN_env * swedish_create_env(void);
9
+ extern void swedish_close_env(struct SN_env * z);
10
+
11
+ extern int swedish_stem(struct SN_env * z);
12
+
13
+ #ifdef __cplusplus
14
+ }
15
+ #endif
16
+
@@ -0,0 +1,146 @@
1
+ /*
2
+ * stemmer.c - An interface for libstemmer library.
3
+ * http://snowball.tartarus.org/
4
+ *
5
+ * Copyright (c) Fabien POTENCIER 2005
6
+ *
7
+ */
8
+
9
+ #include "ruby.h"
10
+ #include "libstemmer.h"
11
+
12
+ /*
13
+ * Document-class: Stemmer
14
+ *
15
+ * == Summary
16
+ *
17
+ * Ruby extension for stemming.
18
+ *
19
+ * == Abstract
20
+ *
21
+ * Stemmer is a wrapper class for the <tt>libstemmer</tt> library,
22
+ * the snowball stemming algorithms.
23
+ *
24
+ */
25
+
26
+ static VALUE rb_cStemmer;
27
+
28
+ struct sb_stemmer {
29
+ struct SN_env * (*create)(void);
30
+ void (*close)(struct SN_env *);
31
+ int (*stem)(struct SN_env *);
32
+
33
+ struct SN_env * env;
34
+ };
35
+
36
+ static void
37
+ stemmer_mark(struct sb_stemmer* stemmer)
38
+ {
39
+ }
40
+
41
+ static void
42
+ stemmer_free(struct sb_stemmer* stemmer)
43
+ {
44
+ sb_stemmer_delete(stemmer);
45
+ }
46
+
47
+ /*
48
+ * Document-method: new
49
+ * call-seq: Stemmer.new(language)
50
+ *
51
+ * Creates new code stemmer for +language+.
52
+ *
53
+ * === Parameters
54
+ *
55
+ * +language+:: stemmer language (language ISO code are supported)
56
+ *
57
+ * Currently available languages:
58
+ *
59
+ * * english (en)
60
+ * * french (fr)
61
+ * * spanish (es)
62
+ * * potuguese (pt)
63
+ * * italian (it)
64
+ * * german (de)
65
+ * * dutch (nl)
66
+ * * swedish (se)
67
+ * * danish (da)
68
+ * * russian (ru)
69
+ * * finnish (fi)
70
+ *
71
+ * === Exceptions
72
+ *
73
+ * ArgumentError:: if we can't find an available stemmer for +language+
74
+ *
75
+ * === Examples
76
+ *
77
+ * stemmer = Stemmer.new('en')
78
+ * puts stemmer.stem('testing')
79
+ *
80
+ * stemmer = Stemmer.new('fr')
81
+ * puts stemmer.stem('�t�')
82
+ *
83
+ */
84
+ static VALUE
85
+ stemmer_allocate(VALUE klass, VALUE algorithm)
86
+ {
87
+ struct sb_stemmer* stemmer;
88
+ char * calgorithm;
89
+
90
+ Check_Type(algorithm, T_STRING);
91
+ calgorithm = strdup(STR2CSTR(algorithm));
92
+ if (!strcmp(calgorithm, "es"))
93
+ {
94
+ strcpy(calgorithm, "spanish");
95
+ }
96
+
97
+ stemmer = sb_stemmer_new(calgorithm);
98
+ free(calgorithm);
99
+
100
+ if (stemmer == NULL)
101
+ {
102
+ rb_raise(rb_eArgError, "Algorithm '%s' doesn't exist", STR2CSTR(algorithm));
103
+ }
104
+
105
+ return Data_Wrap_Struct(klass, stemmer_mark, stemmer_free, stemmer);
106
+ }
107
+
108
+ /*
109
+ * Document-method: stem
110
+ * call-seq: stem(str)
111
+ *
112
+ * Stems string and returns the result.
113
+ *
114
+ * === Parameters
115
+ *
116
+ * +str+:: string to stem. String must be encoding the 'right' way (iso-8859-1 for french for example).
117
+ */
118
+ static VALUE
119
+ stemmer_stem(VALUE self, VALUE word)
120
+ {
121
+ VALUE ret;
122
+ sb_symbol *cword;
123
+ const sb_symbol *rcword;
124
+ struct sb_stemmer *stemmer;
125
+
126
+ Check_Type(word, T_STRING);
127
+
128
+ cword = strdup(STR2CSTR(word));
129
+ Data_Get_Struct(self, struct sb_stemmer, stemmer);
130
+ rcword = sb_stemmer_stem(stemmer, cword, RSTRING(word)->len);
131
+
132
+ ret = rb_str_new2(rcword);
133
+ free(cword);
134
+
135
+ return ret;
136
+ }
137
+
138
+ void
139
+ Init_stemmer4r()
140
+ {
141
+ rb_cStemmer = rb_define_class("Stemmer", rb_cObject);
142
+ rb_define_singleton_method(rb_cStemmer, "new", stemmer_allocate, 1);
143
+ rb_define_method(rb_cStemmer, "stem", stemmer_stem, 1);
144
+ rb_define_method(rb_cStemmer, "list", stemmer_list, 0);
145
+ }
146
+
data/stemmer4r.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+
3
+ spec = Gem::Specification.new do |s|
4
+ s.name = 'stemmer4r'
5
+ s.version = '0.1'
6
+ s.author = "Fabien POTENCIER"
7
+ s.email = "fabien.potencier@gmail.com"
8
+ s.homepage = "http://stemmer4r.rubyforge.org"
9
+ s.rubyforge_project = "stemmer4r"
10
+ s.summary = <<-EOF
11
+ Stemmer4r is a Ruby extension that wraps the snowball stemmer library (libstemmer).
12
+ EOF
13
+ s.description = <<-EOF
14
+ Stemmer4r is a Ruby extension that wraps the snowball stemmer library (libstemmer).
15
+ EOF
16
+ s.files = Dir.glob("**/*").delete_if { |item| item.include?(".svn") }
17
+ s.extensions << "ext/stemmer4r/extconf.rb"
18
+ s.require_path = '.'
19
+ s.autorequire = 'stemmer4r'
20
+ s.has_rdoc = true
21
+ s.extra_rdoc_files = ["README"]
22
+ s.test_files = Dir.glob('test/*.rb')
23
+ end
data/test/CVS/Entries ADDED
@@ -0,0 +1,2 @@
1
+ /test.rb/1.1.1.1/Wed May 11 07:39:43 2005//
2
+ D/tests////
@@ -0,0 +1 @@
1
+ stemmer4r/test
data/test/CVS/Root ADDED
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r
data/test/test.rb ADDED
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ begin
4
+ require 'rubygems'
5
+ require_gem 'stemmer4r'
6
+ rescue LoadError
7
+ require 'stemmer4r'
8
+ end
9
+
10
+ require 'test/unit'
11
+
12
+ class StemmerTest < Test::Unit::TestCase
13
+ def test_valid_language
14
+ assert_not_nil(Stemmer.new('fr'))
15
+ assert_nothing_raised() { Stemmer.new('fr') }
16
+ assert_raise(ArgumentError) { Stemmer.new('notavalidlanguage') }
17
+ end
18
+
19
+ def test_stemmer
20
+ dir = File.dirname(__FILE__)
21
+ %w{da de nl en es fi fr it no pt ru sv}.each { |language|
22
+ puts "Testing '#{language}' algorithm..."
23
+ stemmer = Stemmer.new(language)
24
+ input = File.new("#{dir}/tests/#{language}/voc.txt")
25
+ output = File.new("#{dir}/tests/#{language}/output.txt")
26
+ input.each_line { |word|
27
+ assert_equal(output.gets.chomp!, stemmer.stem(word.chomp!))
28
+ }
29
+ }
30
+ end
31
+ end
@@ -0,0 +1,12 @@
1
+ D/da////
2
+ D/de////
3
+ D/en////
4
+ D/es////
5
+ D/fi////
6
+ D/fr////
7
+ D/it////
8
+ D/nl////
9
+ D/no////
10
+ D/pt////
11
+ D/ru////
12
+ D/sv////
@@ -0,0 +1 @@
1
+ stemmer4r/test/tests
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r
@@ -0,0 +1,3 @@
1
+ /output.txt/1.1.1.1/Wed May 11 07:41:22 2005//
2
+ /voc.txt/1.1.1.1/Wed May 11 07:41:28 2005//
3
+ D
@@ -0,0 +1 @@
1
+ stemmer4r/test/tests/da
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r