stemmer4r 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CVS/Entries +5 -0
- data/CVS/Repository +1 -0
- data/CVS/Root +1 -0
- data/LICENSE +20 -0
- data/README +9 -0
- data/ext/CVS/Entries +1 -0
- data/ext/CVS/Repository +1 -0
- data/ext/CVS/Root +1 -0
- data/ext/stemmer4r/CVS/Entries +4 -0
- data/ext/stemmer4r/CVS/Repository +1 -0
- data/ext/stemmer4r/CVS/Root +1 -0
- data/ext/stemmer4r/depend +14 -0
- data/ext/stemmer4r/extconf.rb +8 -0
- data/ext/stemmer4r/libstemmer_c/CVS/Entries +7 -0
- data/ext/stemmer4r/libstemmer_c/CVS/Repository +1 -0
- data/ext/stemmer4r/libstemmer_c/CVS/Root +1 -0
- data/ext/stemmer4r/libstemmer_c/MANIFEST +39 -0
- data/ext/stemmer4r/libstemmer_c/Makefile +5 -0
- data/ext/stemmer4r/libstemmer_c/include/CVS/Entries +2 -0
- data/ext/stemmer4r/libstemmer_c/include/CVS/Repository +1 -0
- data/ext/stemmer4r/libstemmer_c/include/CVS/Root +1 -0
- data/ext/stemmer4r/libstemmer_c/include/libstemmer.h +63 -0
- data/ext/stemmer4r/libstemmer_c/libstemmer/CVS/Entries +3 -0
- data/ext/stemmer4r/libstemmer_c/libstemmer/CVS/Repository +1 -0
- data/ext/stemmer4r/libstemmer_c/libstemmer/CVS/Root +1 -0
- data/ext/stemmer4r/libstemmer_c/libstemmer/libstemmer.c +78 -0
- data/ext/stemmer4r/libstemmer_c/libstemmer/modules.h +96 -0
- data/ext/stemmer4r/libstemmer_c/mkinc.mak +42 -0
- data/ext/stemmer4r/libstemmer_c/runtime/CVS/Entries +5 -0
- data/ext/stemmer4r/libstemmer_c/runtime/CVS/Repository +1 -0
- data/ext/stemmer4r/libstemmer_c/runtime/CVS/Root +1 -0
- data/ext/stemmer4r/libstemmer_c/runtime/api.c +69 -0
- data/ext/stemmer4r/libstemmer_c/runtime/api.h +27 -0
- data/ext/stemmer4r/libstemmer_c/runtime/header.h +56 -0
- data/ext/stemmer4r/libstemmer_c/runtime/utilities.c +403 -0
- data/ext/stemmer4r/libstemmer_c/src_c/CVS/Entries +33 -0
- data/ext/stemmer4r/libstemmer_c/src_c/CVS/Repository +1 -0
- data/ext/stemmer4r/libstemmer_c/src_c/CVS/Root +1 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_danish.c +330 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_danish.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_dutch.c +635 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_dutch.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_english.c +1109 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_english.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_finnish.c +792 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_finnish.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_french.c +1276 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_french.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_german.c +504 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_german.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_german2.c +549 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_german2.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_italian.c +1087 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_italian.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_kraaij_pohlmann.c +1780 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_kraaij_pohlmann.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_lovins.c +1752 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_lovins.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_norwegian.c +279 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_norwegian.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_porter.c +776 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_porter.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_portuguese.c +1027 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_portuguese.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_russian.c +701 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_russian.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_spanish.c +1109 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_spanish.h +16 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_swedish.c +299 -0
- data/ext/stemmer4r/libstemmer_c/src_c/stem_swedish.h +16 -0
- data/ext/stemmer4r/stemmer4r.c +146 -0
- data/stemmer4r.gemspec +23 -0
- data/test/CVS/Entries +2 -0
- data/test/CVS/Repository +1 -0
- data/test/CVS/Root +1 -0
- data/test/test.rb +31 -0
- data/test/tests/CVS/Entries +12 -0
- data/test/tests/CVS/Repository +1 -0
- data/test/tests/CVS/Root +1 -0
- data/test/tests/da/CVS/Entries +3 -0
- data/test/tests/da/CVS/Repository +1 -0
- data/test/tests/da/CVS/Root +1 -0
- data/test/tests/da/output.txt +23829 -0
- data/test/tests/da/voc.txt +23829 -0
- data/test/tests/de/CVS/Entries +3 -0
- data/test/tests/de/CVS/Repository +1 -0
- data/test/tests/de/CVS/Root +1 -0
- data/test/tests/de/output.txt +35033 -0
- data/test/tests/de/voc.txt +35033 -0
- data/test/tests/en/CVS/Entries +3 -0
- data/test/tests/en/CVS/Repository +1 -0
- data/test/tests/en/CVS/Root +1 -0
- data/test/tests/en/output.txt +29400 -0
- data/test/tests/en/voc.txt +29400 -0
- data/test/tests/es/CVS/Entries +3 -0
- data/test/tests/es/CVS/Repository +1 -0
- data/test/tests/es/CVS/Root +1 -0
- data/test/tests/es/output.txt +28390 -0
- data/test/tests/es/voc.txt +28390 -0
- data/test/tests/fi/CVS/Entries +3 -0
- data/test/tests/fi/CVS/Repository +1 -0
- data/test/tests/fi/CVS/Root +1 -0
- data/test/tests/fi/output.txt +50000 -0
- data/test/tests/fi/voc.txt +50000 -0
- data/test/tests/fr/CVS/Entries +3 -0
- data/test/tests/fr/CVS/Repository +1 -0
- data/test/tests/fr/CVS/Root +1 -0
- data/test/tests/fr/output.txt +20403 -0
- data/test/tests/fr/voc.txt +20403 -0
- data/test/tests/it/CVS/Entries +3 -0
- data/test/tests/it/CVS/Repository +1 -0
- data/test/tests/it/CVS/Root +1 -0
- data/test/tests/it/output.txt +35494 -0
- data/test/tests/it/voc.txt +35494 -0
- data/test/tests/nl/CVS/Entries +3 -0
- data/test/tests/nl/CVS/Repository +1 -0
- data/test/tests/nl/CVS/Root +1 -0
- data/test/tests/nl/output.txt +45669 -0
- data/test/tests/nl/voc.txt +45669 -0
- data/test/tests/no/CVS/Entries +3 -0
- data/test/tests/no/CVS/Repository +1 -0
- data/test/tests/no/CVS/Root +1 -0
- data/test/tests/no/output.txt +20628 -0
- data/test/tests/no/voc.txt +20628 -0
- data/test/tests/pt/CVS/Entries +3 -0
- data/test/tests/pt/CVS/Repository +1 -0
- data/test/tests/pt/CVS/Root +1 -0
- data/test/tests/pt/output.txt +32016 -0
- data/test/tests/pt/voc.txt +32016 -0
- data/test/tests/ru/CVS/Entries +3 -0
- data/test/tests/ru/CVS/Repository +1 -0
- data/test/tests/ru/CVS/Root +1 -0
- data/test/tests/ru/output.txt +49673 -0
- data/test/tests/ru/voc.txt +49673 -0
- data/test/tests/sv/CVS/Entries +3 -0
- data/test/tests/sv/CVS/Repository +1 -0
- data/test/tests/sv/CVS/Root +1 -0
- data/test/tests/sv/output.txt +30623 -0
- data/test/tests/sv/voc.txt +30623 -0
- metadata +221 -0
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
|
|
2
|
+
#include <stdio.h>
|
|
3
|
+
#include <stdlib.h>
|
|
4
|
+
#include <string.h>
|
|
5
|
+
|
|
6
|
+
#include "header.h"
|
|
7
|
+
|
|
8
|
+
#define unless(C) if(!(C))
|
|
9
|
+
|
|
10
|
+
#define CREATE_SIZE 1
|
|
11
|
+
|
|
12
|
+
extern symbol * create_s(void)
|
|
13
|
+
{
|
|
14
|
+
symbol * p;
|
|
15
|
+
void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
|
|
16
|
+
if (mem == NULL) return NULL;
|
|
17
|
+
p = (symbol *) (HEAD + (char *) mem);
|
|
18
|
+
CAPACITY(p) = CREATE_SIZE;
|
|
19
|
+
SET_SIZE(p, CREATE_SIZE);
|
|
20
|
+
return p;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
extern void lose_s(symbol * p)
|
|
24
|
+
{
|
|
25
|
+
if (p == NULL) return;
|
|
26
|
+
free((char *) p - HEAD);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max)
|
|
30
|
+
{ if (z->c >= z->l) return 0;
|
|
31
|
+
{ int ch = z->p[z->c];
|
|
32
|
+
if
|
|
33
|
+
(ch > max || (ch -= min) < 0 ||
|
|
34
|
+
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
|
35
|
+
}
|
|
36
|
+
z->c++; return 1;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
|
|
40
|
+
{ if (z->c <= z->lb) return 0;
|
|
41
|
+
{ int ch = z->p[z->c - 1];
|
|
42
|
+
if
|
|
43
|
+
(ch > max || (ch -= min) < 0 ||
|
|
44
|
+
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
|
45
|
+
}
|
|
46
|
+
z->c--; return 1;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max)
|
|
50
|
+
{ if (z->c >= z->l) return 0;
|
|
51
|
+
{ int ch = z->p[z->c];
|
|
52
|
+
unless
|
|
53
|
+
(ch > max || (ch -= min) < 0 ||
|
|
54
|
+
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
|
55
|
+
}
|
|
56
|
+
z->c++; return 1;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max)
|
|
60
|
+
{ if (z->c <= z->lb) return 0;
|
|
61
|
+
{ int ch = z->p[z->c - 1];
|
|
62
|
+
unless
|
|
63
|
+
(ch > max || (ch -= min) < 0 ||
|
|
64
|
+
(s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0;
|
|
65
|
+
}
|
|
66
|
+
z->c--; return 1;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
extern int in_range(struct SN_env * z, int min, int max)
|
|
71
|
+
{ if (z->c >= z->l) return 0;
|
|
72
|
+
{ int ch = z->p[z->c];
|
|
73
|
+
if
|
|
74
|
+
(ch > max || ch < min) return 0;
|
|
75
|
+
}
|
|
76
|
+
z->c++; return 1;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
extern int in_range_b(struct SN_env * z, int min, int max)
|
|
80
|
+
{ if (z->c <= z->lb) return 0;
|
|
81
|
+
{ int ch = z->p[z->c - 1];
|
|
82
|
+
if
|
|
83
|
+
(ch > max || ch < min) return 0;
|
|
84
|
+
}
|
|
85
|
+
z->c--; return 1;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
extern int out_range(struct SN_env * z, int min, int max)
|
|
89
|
+
{ if (z->c >= z->l) return 0;
|
|
90
|
+
{ int ch = z->p[z->c];
|
|
91
|
+
unless
|
|
92
|
+
(ch > max || ch < min) return 0;
|
|
93
|
+
}
|
|
94
|
+
z->c++; return 1;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
extern int out_range_b(struct SN_env * z, int min, int max)
|
|
98
|
+
{ if (z->c <= z->lb) return 0;
|
|
99
|
+
{ int ch = z->p[z->c - 1];
|
|
100
|
+
unless
|
|
101
|
+
(ch > max || ch < min) return 0;
|
|
102
|
+
}
|
|
103
|
+
z->c--; return 1;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
extern int eq_s(struct SN_env * z, int s_size, symbol * s)
|
|
107
|
+
{ if (z->l - z->c < s_size ||
|
|
108
|
+
memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
|
|
109
|
+
z->c += s_size; return 1;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
extern int eq_s_b(struct SN_env * z, int s_size, symbol * s)
|
|
113
|
+
{ if (z->c - z->lb < s_size ||
|
|
114
|
+
memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
|
|
115
|
+
z->c -= s_size; return 1;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
extern int eq_v(struct SN_env * z, symbol * p)
|
|
119
|
+
{ return eq_s(z, SIZE(p), p);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
extern int eq_v_b(struct SN_env * z, symbol * p)
|
|
123
|
+
{ return eq_s_b(z, SIZE(p), p);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
extern int find_among(struct SN_env * z, struct among * v, int v_size)
|
|
127
|
+
{
|
|
128
|
+
int i = 0;
|
|
129
|
+
int j = v_size;
|
|
130
|
+
|
|
131
|
+
int c = z->c; int l = z->l;
|
|
132
|
+
symbol * q = z->p + c;
|
|
133
|
+
|
|
134
|
+
struct among * w;
|
|
135
|
+
|
|
136
|
+
int common_i = 0;
|
|
137
|
+
int common_j = 0;
|
|
138
|
+
|
|
139
|
+
int first_key_inspected = 0;
|
|
140
|
+
|
|
141
|
+
while(1)
|
|
142
|
+
{ int k = i + ((j - i) >> 1);
|
|
143
|
+
int diff = 0;
|
|
144
|
+
int common = common_i < common_j ? common_i : common_j; /* smaller */
|
|
145
|
+
w = v + k;
|
|
146
|
+
{ int i; for (i = common; i < w->s_size; i++)
|
|
147
|
+
{ if (c + common == l) { diff = -1; break; }
|
|
148
|
+
diff = q[common] - w->s[i];
|
|
149
|
+
if (diff != 0) break;
|
|
150
|
+
common++;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
if (diff < 0) { j = k; common_j = common; }
|
|
154
|
+
else { i = k; common_i = common; }
|
|
155
|
+
if (j - i <= 1)
|
|
156
|
+
{ if (i > 0) break; /* v->s has been inspected */
|
|
157
|
+
if (j == i) break; /* only one item in v */
|
|
158
|
+
|
|
159
|
+
/* - but now we need to go round once more to get
|
|
160
|
+
v->s inspected. This looks messy, but is actually
|
|
161
|
+
the optimal approach. */
|
|
162
|
+
|
|
163
|
+
if (first_key_inspected) break;
|
|
164
|
+
first_key_inspected = 1;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
while(1)
|
|
168
|
+
{ w = v + i;
|
|
169
|
+
if (common_i >= w->s_size)
|
|
170
|
+
{ z->c = c + w->s_size;
|
|
171
|
+
if (w->function == 0) return w->result;
|
|
172
|
+
{ int res = w->function(z);
|
|
173
|
+
z->c = c + w->s_size;
|
|
174
|
+
if (res) return w->result;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
i = w->substring_i;
|
|
178
|
+
if (i < 0) return 0;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/* find_among_b is for backwards processing. Same comments apply */
|
|
183
|
+
|
|
184
|
+
extern int find_among_b(struct SN_env * z, struct among * v, int v_size)
|
|
185
|
+
{
|
|
186
|
+
int i = 0;
|
|
187
|
+
int j = v_size;
|
|
188
|
+
|
|
189
|
+
int c = z->c; int lb = z->lb;
|
|
190
|
+
symbol * q = z->p + c - 1;
|
|
191
|
+
|
|
192
|
+
struct among * w;
|
|
193
|
+
|
|
194
|
+
int common_i = 0;
|
|
195
|
+
int common_j = 0;
|
|
196
|
+
|
|
197
|
+
int first_key_inspected = 0;
|
|
198
|
+
|
|
199
|
+
while(1)
|
|
200
|
+
{ int k = i + ((j - i) >> 1);
|
|
201
|
+
int diff = 0;
|
|
202
|
+
int common = common_i < common_j ? common_i : common_j;
|
|
203
|
+
w = v + k;
|
|
204
|
+
{ int i; for (i = w->s_size - 1 - common; i >= 0; i--)
|
|
205
|
+
{ if (c - common == lb) { diff = -1; break; }
|
|
206
|
+
diff = q[- common] - w->s[i];
|
|
207
|
+
if (diff != 0) break;
|
|
208
|
+
common++;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
if (diff < 0) { j = k; common_j = common; }
|
|
212
|
+
else { i = k; common_i = common; }
|
|
213
|
+
if (j - i <= 1)
|
|
214
|
+
{ if (i > 0) break;
|
|
215
|
+
if (j == i) break;
|
|
216
|
+
if (first_key_inspected) break;
|
|
217
|
+
first_key_inspected = 1;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
while(1)
|
|
221
|
+
{ w = v + i;
|
|
222
|
+
if (common_i >= w->s_size)
|
|
223
|
+
{ z->c = c - w->s_size;
|
|
224
|
+
if (w->function == 0) return w->result;
|
|
225
|
+
{ int res = w->function(z);
|
|
226
|
+
z->c = c - w->s_size;
|
|
227
|
+
if (res) return w->result;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
i = w->substring_i;
|
|
231
|
+
if (i < 0) return 0;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
/* Increase the size of the buffer pointed to by p to at least n bytes.
|
|
237
|
+
* If insufficient memory, returns NULL and frees the old buffer.
|
|
238
|
+
*/
|
|
239
|
+
static symbol * increase_size(symbol * p, int n)
|
|
240
|
+
{
|
|
241
|
+
symbol * q;
|
|
242
|
+
int new_size = n + 20;
|
|
243
|
+
void * mem = realloc((char *) p - HEAD,
|
|
244
|
+
HEAD + (new_size + 1) * sizeof(symbol));
|
|
245
|
+
if (mem == NULL)
|
|
246
|
+
{
|
|
247
|
+
lose_s(p);
|
|
248
|
+
return NULL;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
q = (symbol *) (HEAD + (char *)mem);
|
|
252
|
+
CAPACITY(q) = new_size;
|
|
253
|
+
return q;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/* to replace symbols between c_bra and c_ket in z->p by the
|
|
257
|
+
s_size symbols at s.
|
|
258
|
+
Returns 0 on success, -1 on error.
|
|
259
|
+
Also, frees z->p (and sets it to NULL) on error.
|
|
260
|
+
*/
|
|
261
|
+
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
|
|
262
|
+
{
|
|
263
|
+
int adjustment;
|
|
264
|
+
int len;
|
|
265
|
+
if (z->p == NULL) {
|
|
266
|
+
z->p = create_s();
|
|
267
|
+
if (z->p == NULL) return -1;
|
|
268
|
+
}
|
|
269
|
+
adjustment = s_size - (c_ket - c_bra);
|
|
270
|
+
len = SIZE(z->p);
|
|
271
|
+
if (adjustment != 0)
|
|
272
|
+
{
|
|
273
|
+
if (adjustment + len > CAPACITY(z->p))
|
|
274
|
+
{
|
|
275
|
+
z->p = increase_size(z->p, adjustment + len);
|
|
276
|
+
if (z->p == NULL) return -1;
|
|
277
|
+
}
|
|
278
|
+
memmove(z->p + c_ket + adjustment,
|
|
279
|
+
z->p + c_ket,
|
|
280
|
+
(len - c_ket) * sizeof(symbol));
|
|
281
|
+
SET_SIZE(z->p, adjustment + len);
|
|
282
|
+
z->l += adjustment;
|
|
283
|
+
if (z->c >= c_ket)
|
|
284
|
+
z->c += adjustment;
|
|
285
|
+
else
|
|
286
|
+
if (z->c > c_bra)
|
|
287
|
+
z->c = c_bra;
|
|
288
|
+
}
|
|
289
|
+
unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
|
|
290
|
+
if (adjptr != NULL)
|
|
291
|
+
*adjptr = adjustment;
|
|
292
|
+
return 0;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
static int slice_check(struct SN_env * z)
|
|
296
|
+
{
|
|
297
|
+
if (z->bra < 0 ||
|
|
298
|
+
z->bra > z->ket ||
|
|
299
|
+
z->ket > z->l ||
|
|
300
|
+
z->p == NULL ||
|
|
301
|
+
z->l > SIZE(z->p)) /* this line could be removed */
|
|
302
|
+
{
|
|
303
|
+
#if 0
|
|
304
|
+
fprintf(stderr, "faulty slice operation:\n");
|
|
305
|
+
debug(z, -1, 0);
|
|
306
|
+
#endif
|
|
307
|
+
return -1;
|
|
308
|
+
}
|
|
309
|
+
return 0;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
extern int slice_from_s(struct SN_env * z, int s_size, symbol * s)
|
|
313
|
+
{
|
|
314
|
+
if (slice_check(z)) return -1;
|
|
315
|
+
return replace_s(z, z->bra, z->ket, s_size, s, NULL);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
extern int slice_from_v(struct SN_env * z, symbol * p)
|
|
319
|
+
{
|
|
320
|
+
return slice_from_s(z, SIZE(p), p);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
extern int slice_del(struct SN_env * z)
|
|
324
|
+
{
|
|
325
|
+
return slice_from_s(z, 0, 0);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s)
|
|
329
|
+
{
|
|
330
|
+
int adjustment;
|
|
331
|
+
if (replace_s(z, bra, ket, s_size, s, &adjustment))
|
|
332
|
+
return -1;
|
|
333
|
+
if (bra <= z->bra) z->bra += adjustment;
|
|
334
|
+
if (bra <= z->ket) z->ket += adjustment;
|
|
335
|
+
return 0;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
extern int insert_v(struct SN_env * z, int bra, int ket, symbol * p)
|
|
339
|
+
{
|
|
340
|
+
int adjustment;
|
|
341
|
+
if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
|
|
342
|
+
return -1;
|
|
343
|
+
if (bra <= z->bra) z->bra += adjustment;
|
|
344
|
+
if (bra <= z->ket) z->ket += adjustment;
|
|
345
|
+
return 0;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
extern symbol * slice_to(struct SN_env * z, symbol * p)
|
|
349
|
+
{
|
|
350
|
+
if (slice_check(z))
|
|
351
|
+
{
|
|
352
|
+
lose_s(p);
|
|
353
|
+
return NULL;
|
|
354
|
+
}
|
|
355
|
+
{
|
|
356
|
+
int len = z->ket - z->bra;
|
|
357
|
+
if (CAPACITY(p) < len)
|
|
358
|
+
{
|
|
359
|
+
p = increase_size(p, len);
|
|
360
|
+
if (p == NULL)
|
|
361
|
+
return NULL;
|
|
362
|
+
}
|
|
363
|
+
memmove(p, z->p + z->bra, len * sizeof(symbol));
|
|
364
|
+
SET_SIZE(p, len);
|
|
365
|
+
}
|
|
366
|
+
return p;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
extern symbol * assign_to(struct SN_env * z, symbol * p)
|
|
370
|
+
{
|
|
371
|
+
int len = z->l;
|
|
372
|
+
if (CAPACITY(p) < len)
|
|
373
|
+
{
|
|
374
|
+
p = increase_size(p, len);
|
|
375
|
+
if (p == NULL)
|
|
376
|
+
return NULL;
|
|
377
|
+
}
|
|
378
|
+
memmove(p, z->p, len * sizeof(symbol));
|
|
379
|
+
SET_SIZE(p, len);
|
|
380
|
+
return p;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
#if 0
|
|
384
|
+
extern void debug(struct SN_env * z, int number, int line_count)
|
|
385
|
+
{ int i;
|
|
386
|
+
int limit = SIZE(z->p);
|
|
387
|
+
/*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
|
|
388
|
+
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
|
|
389
|
+
for (i = 0; i <= limit; i++)
|
|
390
|
+
{ if (z->lb == i) printf("{");
|
|
391
|
+
if (z->bra == i) printf("[");
|
|
392
|
+
if (z->c == i) printf("|");
|
|
393
|
+
if (z->ket == i) printf("]");
|
|
394
|
+
if (z->l == i) printf("}");
|
|
395
|
+
if (i < limit)
|
|
396
|
+
{ int ch = z->p[i];
|
|
397
|
+
if (ch == 0) ch = '#';
|
|
398
|
+
printf("%c", ch);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
printf("'\n");
|
|
402
|
+
}
|
|
403
|
+
#endif
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/stem_danish.c/1.1.1.1/Wed May 11 07:39:40 2005//
|
|
2
|
+
/stem_danish.h/1.1.1.1/Wed May 11 07:39:40 2005//
|
|
3
|
+
/stem_dutch.c/1.1.1.1/Wed May 11 07:39:37 2005//
|
|
4
|
+
/stem_dutch.h/1.1.1.1/Wed May 11 07:39:36 2005//
|
|
5
|
+
/stem_english.c/1.1.1.1/Wed May 11 07:39:30 2005//
|
|
6
|
+
/stem_english.h/1.1.1.1/Wed May 11 07:39:32 2005//
|
|
7
|
+
/stem_finnish.c/1.1.1.1/Wed May 11 07:39:34 2005//
|
|
8
|
+
/stem_finnish.h/1.1.1.1/Wed May 11 07:39:40 2005//
|
|
9
|
+
/stem_french.c/1.1.1.1/Wed May 11 07:39:38 2005//
|
|
10
|
+
/stem_french.h/1.1.1.1/Wed May 11 07:39:27 2005//
|
|
11
|
+
/stem_german.c/1.1.1.1/Wed May 11 07:39:33 2005//
|
|
12
|
+
/stem_german.h/1.1.1.1/Wed May 11 07:39:34 2005//
|
|
13
|
+
/stem_german2.c/1.1.1.1/Wed May 11 07:39:40 2005//
|
|
14
|
+
/stem_german2.h/1.1.1.1/Wed May 11 07:39:42 2005//
|
|
15
|
+
/stem_italian.c/1.1.1.1/Wed May 11 07:39:28 2005//
|
|
16
|
+
/stem_italian.h/1.1.1.1/Wed May 11 07:39:38 2005//
|
|
17
|
+
/stem_kraaij_pohlmann.c/1.1.1.1/Wed May 11 07:39:42 2005//
|
|
18
|
+
/stem_kraaij_pohlmann.h/1.1.1.1/Wed May 11 07:39:32 2005//
|
|
19
|
+
/stem_lovins.c/1.1.1.1/Wed May 11 07:39:36 2005//
|
|
20
|
+
/stem_lovins.h/1.1.1.1/Wed May 11 07:39:30 2005//
|
|
21
|
+
/stem_norwegian.c/1.1.1.1/Wed May 11 07:39:39 2005//
|
|
22
|
+
/stem_norwegian.h/1.1.1.1/Wed May 11 07:39:40 2005//
|
|
23
|
+
/stem_porter.c/1.1.1.1/Wed May 11 07:39:33 2005//
|
|
24
|
+
/stem_porter.h/1.1.1.1/Wed May 11 07:39:43 2005//
|
|
25
|
+
/stem_portuguese.c/1.1.1.1/Wed May 11 07:39:31 2005//
|
|
26
|
+
/stem_portuguese.h/1.1.1.1/Wed May 11 07:39:40 2005//
|
|
27
|
+
/stem_russian.c/1.1.1.1/Wed May 11 07:39:43 2005//
|
|
28
|
+
/stem_russian.h/1.1.1.1/Wed May 11 07:39:39 2005//
|
|
29
|
+
/stem_spanish.c/1.1.1.1/Wed May 11 07:39:32 2005//
|
|
30
|
+
/stem_spanish.h/1.1.1.1/Wed May 11 07:39:42 2005//
|
|
31
|
+
/stem_swedish.c/1.1.1.1/Wed May 11 07:39:37 2005//
|
|
32
|
+
/stem_swedish.h/1.1.1.1/Wed May 11 07:39:43 2005//
|
|
33
|
+
D
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
stemmer4r/ext/stemmer4r/libstemmer_c/src_c
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
:ext:fabpot@rubyforge.org:/var/cvs/stemmer4r
|