isomorfeus-ferret 0.12.4 → 0.12.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +612 -612
- data/README.md +80 -48
- data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
- data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
- data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +7 -7
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
- data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
- data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
- data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
- data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
- data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
- data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
- data/ext/isomorfeus_ferret_ext/test.c +7 -1
- data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +39 -4
- data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
@@ -5,58 +5,66 @@
|
|
5
5
|
|
6
6
|
#include "stem_header.h"
|
7
7
|
|
8
|
-
#define unless(C) if(!(C))
|
9
|
-
|
10
8
|
#define CREATE_SIZE 1
|
11
9
|
|
12
|
-
symbol * create_s(void) {
|
10
|
+
extern symbol * create_s(void) {
|
13
11
|
symbol * p;
|
14
12
|
void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol));
|
15
13
|
if (mem == NULL) return NULL;
|
16
14
|
p = (symbol *) (HEAD + (char *) mem);
|
17
15
|
CAPACITY(p) = CREATE_SIZE;
|
18
|
-
SET_SIZE(p,
|
16
|
+
SET_SIZE(p, 0);
|
19
17
|
return p;
|
20
18
|
}
|
21
19
|
|
22
|
-
void lose_s(symbol * p) {
|
20
|
+
extern void lose_s(symbol * p) {
|
23
21
|
if (p == NULL) return;
|
24
22
|
free((char *) p - HEAD);
|
25
23
|
}
|
26
24
|
|
27
25
|
/*
|
28
|
-
new_p = skip_utf8(p, c,
|
29
|
-
|
30
|
-
position, or 0 on failure.
|
26
|
+
new_p = skip_utf8(p, c, l, n); skips n characters forwards from p + c.
|
27
|
+
new_p is the new position, or -1 on failure.
|
31
28
|
|
32
29
|
-- used to implement hop and next in the utf8 case.
|
33
30
|
*/
|
34
31
|
|
35
|
-
int skip_utf8(const symbol * p, int c, int
|
32
|
+
extern int skip_utf8(const symbol * p, int c, int limit, int n) {
|
36
33
|
int b;
|
37
|
-
if (n
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
}
|
34
|
+
if (n < 0) return -1;
|
35
|
+
for (; n > 0; n--) {
|
36
|
+
if (c >= limit) return -1;
|
37
|
+
b = p[c++];
|
38
|
+
if (b >= 0xC0) { /* 1100 0000 */
|
39
|
+
while (c < limit) {
|
40
|
+
b = p[c];
|
41
|
+
if (b >= 0xC0 || b < 0x80) break;
|
42
|
+
/* break unless b is 10------ */
|
43
|
+
c++;
|
48
44
|
}
|
49
45
|
}
|
50
|
-
}
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
46
|
+
}
|
47
|
+
return c;
|
48
|
+
}
|
49
|
+
|
50
|
+
/*
|
51
|
+
new_p = skip_b_utf8(p, c, lb, n); skips n characters backwards from p + c - 1
|
52
|
+
new_p is the new position, or -1 on failure.
|
53
|
+
|
54
|
+
-- used to implement hop and next in the utf8 case.
|
55
|
+
*/
|
56
|
+
|
57
|
+
extern int skip_b_utf8(const symbol * p, int c, int limit, int n) {
|
58
|
+
int b;
|
59
|
+
if (n < 0) return -1;
|
60
|
+
for (; n > 0; n--) {
|
61
|
+
if (c <= limit) return -1;
|
62
|
+
b = p[--c];
|
63
|
+
if (b >= 0x80) { /* 1000 0000 */
|
64
|
+
while (c > limit) {
|
65
|
+
b = p[c];
|
66
|
+
if (b >= 0xC0) break; /* 1100 0000 */
|
67
|
+
c--;
|
60
68
|
}
|
61
69
|
}
|
62
70
|
}
|
@@ -66,156 +74,174 @@ int skip_utf8(const symbol * p, int c, int lb, int l, int n) {
|
|
66
74
|
/* Code for character groupings: utf8 cases */
|
67
75
|
|
68
76
|
static int get_utf8(const symbol * p, int c, int l, int * slot) {
|
69
|
-
int b0, b1;
|
77
|
+
int b0, b1, b2;
|
70
78
|
if (c >= l) return 0;
|
71
79
|
b0 = p[c++];
|
72
80
|
if (b0 < 0xC0 || c == l) { /* 1100 0000 */
|
73
|
-
*
|
81
|
+
*slot = b0;
|
82
|
+
return 1;
|
74
83
|
}
|
75
|
-
b1 = p[c++];
|
84
|
+
b1 = p[c++] & 0x3F;
|
76
85
|
if (b0 < 0xE0 || c == l) { /* 1110 0000 */
|
77
|
-
*
|
86
|
+
*slot = (b0 & 0x1F) << 6 | b1;
|
87
|
+
return 2;
|
88
|
+
}
|
89
|
+
b2 = p[c++] & 0x3F;
|
90
|
+
if (b0 < 0xF0 || c == l) { /* 1111 0000 */
|
91
|
+
*slot = (b0 & 0xF) << 12 | b1 << 6 | b2;
|
92
|
+
return 3;
|
78
93
|
}
|
79
|
-
*
|
94
|
+
*slot = (b0 & 0x7) << 18 | b1 << 12 | b2 << 6 | (p[c] & 0x3F);
|
95
|
+
return 4;
|
80
96
|
}
|
81
97
|
|
82
98
|
static int get_b_utf8(const symbol * p, int c, int lb, int * slot) {
|
83
|
-
int
|
99
|
+
int a, b;
|
84
100
|
if (c <= lb) return 0;
|
85
|
-
|
86
|
-
if (
|
87
|
-
*
|
101
|
+
b = p[--c];
|
102
|
+
if (b < 0x80 || c == lb) { /* 1000 0000 */
|
103
|
+
*slot = b;
|
104
|
+
return 1;
|
88
105
|
}
|
89
|
-
|
90
|
-
|
91
|
-
|
106
|
+
a = b & 0x3F;
|
107
|
+
b = p[--c];
|
108
|
+
if (b >= 0xC0 || c == lb) { /* 1100 0000 */
|
109
|
+
*slot = (b & 0x1F) << 6 | a;
|
110
|
+
return 2;
|
92
111
|
}
|
93
|
-
|
112
|
+
a |= (b & 0x3F) << 6;
|
113
|
+
b = p[--c];
|
114
|
+
if (b >= 0xE0 || c == lb) { /* 1110 0000 */
|
115
|
+
*slot = (b & 0xF) << 12 | a;
|
116
|
+
return 3;
|
117
|
+
}
|
118
|
+
*slot = (p[--c] & 0x7) << 18 | (b & 0x3F) << 12 | a;
|
119
|
+
return 4;
|
94
120
|
}
|
95
121
|
|
96
|
-
int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
122
|
+
extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
97
123
|
do {
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
124
|
+
int ch;
|
125
|
+
int w = get_utf8(z->p, z->c, z->l, & ch);
|
126
|
+
if (!w) return -1;
|
127
|
+
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
128
|
+
return w;
|
129
|
+
z->c += w;
|
104
130
|
} while (repeat);
|
105
131
|
return 0;
|
106
132
|
}
|
107
133
|
|
108
|
-
int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
134
|
+
extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
109
135
|
do {
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
136
|
+
int ch;
|
137
|
+
int w = get_b_utf8(z->p, z->c, z->lb, & ch);
|
138
|
+
if (!w) return -1;
|
139
|
+
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
140
|
+
return w;
|
141
|
+
z->c -= w;
|
116
142
|
} while (repeat);
|
117
143
|
return 0;
|
118
144
|
}
|
119
145
|
|
120
|
-
int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
146
|
+
extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
121
147
|
do {
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
148
|
+
int ch;
|
149
|
+
int w = get_utf8(z->p, z->c, z->l, & ch);
|
150
|
+
if (!w) return -1;
|
151
|
+
if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
|
152
|
+
return w;
|
153
|
+
z->c += w;
|
128
154
|
} while (repeat);
|
129
155
|
return 0;
|
130
156
|
}
|
131
157
|
|
132
|
-
int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
158
|
+
extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
133
159
|
do {
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
160
|
+
int ch;
|
161
|
+
int w = get_b_utf8(z->p, z->c, z->lb, & ch);
|
162
|
+
if (!w) return -1;
|
163
|
+
if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
|
164
|
+
return w;
|
165
|
+
z->c -= w;
|
140
166
|
} while (repeat);
|
141
167
|
return 0;
|
142
168
|
}
|
143
169
|
|
144
170
|
/* Code for character groupings: non-utf8 cases */
|
145
171
|
|
146
|
-
int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
172
|
+
extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
147
173
|
do {
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
174
|
+
int ch;
|
175
|
+
if (z->c >= z->l) return -1;
|
176
|
+
ch = z->p[z->c];
|
177
|
+
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
178
|
+
return 1;
|
179
|
+
z->c++;
|
154
180
|
} while (repeat);
|
155
181
|
return 0;
|
156
182
|
}
|
157
183
|
|
158
|
-
int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
184
|
+
extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
159
185
|
do {
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
186
|
+
int ch;
|
187
|
+
if (z->c <= z->lb) return -1;
|
188
|
+
ch = z->p[z->c - 1];
|
189
|
+
if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)
|
190
|
+
return 1;
|
191
|
+
z->c--;
|
166
192
|
} while (repeat);
|
167
193
|
return 0;
|
168
194
|
}
|
169
195
|
|
170
|
-
int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
196
|
+
extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
171
197
|
do {
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
198
|
+
int ch;
|
199
|
+
if (z->c >= z->l) return -1;
|
200
|
+
ch = z->p[z->c];
|
201
|
+
if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
|
202
|
+
return 1;
|
203
|
+
z->c++;
|
178
204
|
} while (repeat);
|
179
205
|
return 0;
|
180
206
|
}
|
181
207
|
|
182
|
-
int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
208
|
+
extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) {
|
183
209
|
do {
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
210
|
+
int ch;
|
211
|
+
if (z->c <= z->lb) return -1;
|
212
|
+
ch = z->p[z->c - 1];
|
213
|
+
if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0))
|
214
|
+
return 1;
|
215
|
+
z->c--;
|
190
216
|
} while (repeat);
|
191
217
|
return 0;
|
192
218
|
}
|
193
219
|
|
194
|
-
int eq_s(struct SN_env * z, int s_size, const symbol * s) {
|
220
|
+
extern int eq_s(struct SN_env * z, int s_size, const symbol * s) {
|
195
221
|
if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0;
|
196
222
|
z->c += s_size; return 1;
|
197
223
|
}
|
198
224
|
|
199
|
-
int eq_s_b(struct SN_env * z, int s_size, const symbol * s) {
|
225
|
+
extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) {
|
200
226
|
if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0;
|
201
227
|
z->c -= s_size; return 1;
|
202
228
|
}
|
203
229
|
|
204
|
-
int eq_v(struct SN_env * z, const symbol * p) {
|
230
|
+
extern int eq_v(struct SN_env * z, const symbol * p) {
|
205
231
|
return eq_s(z, SIZE(p), p);
|
206
232
|
}
|
207
233
|
|
208
|
-
int eq_v_b(struct SN_env * z, const symbol * p) {
|
234
|
+
extern int eq_v_b(struct SN_env * z, const symbol * p) {
|
209
235
|
return eq_s_b(z, SIZE(p), p);
|
210
236
|
}
|
211
237
|
|
212
|
-
int find_among(struct SN_env * z, const struct among * v, int v_size) {
|
238
|
+
extern int find_among(struct SN_env * z, const struct among * v, int v_size) {
|
213
239
|
|
214
240
|
int i = 0;
|
215
241
|
int j = v_size;
|
216
242
|
|
217
243
|
int c = z->c; int l = z->l;
|
218
|
-
symbol * q = z->p + c;
|
244
|
+
const symbol * q = z->p + c;
|
219
245
|
|
220
246
|
const struct among * w;
|
221
247
|
|
@@ -224,7 +250,7 @@ int find_among(struct SN_env * z, const struct among * v, int v_size) {
|
|
224
250
|
|
225
251
|
int first_key_inspected = 0;
|
226
252
|
|
227
|
-
while(1) {
|
253
|
+
while (1) {
|
228
254
|
int k = i + ((j - i) >> 1);
|
229
255
|
int diff = 0;
|
230
256
|
int common = common_i < common_j ? common_i : common_j; /* smaller */
|
@@ -237,8 +263,13 @@ int find_among(struct SN_env * z, const struct among * v, int v_size) {
|
|
237
263
|
common++;
|
238
264
|
}
|
239
265
|
}
|
240
|
-
if (diff < 0) {
|
241
|
-
|
266
|
+
if (diff < 0) {
|
267
|
+
j = k;
|
268
|
+
common_j = common;
|
269
|
+
} else {
|
270
|
+
i = k;
|
271
|
+
common_i = common;
|
272
|
+
}
|
242
273
|
if (j - i <= 1) {
|
243
274
|
if (i > 0) break; /* v->s has been inspected */
|
244
275
|
if (j == i) break; /* only one item in v */
|
@@ -251,7 +282,7 @@ int find_among(struct SN_env * z, const struct among * v, int v_size) {
|
|
251
282
|
first_key_inspected = 1;
|
252
283
|
}
|
253
284
|
}
|
254
|
-
while(1) {
|
285
|
+
while (1) {
|
255
286
|
w = v + i;
|
256
287
|
if (common_i >= w->s_size) {
|
257
288
|
z->c = c + w->s_size;
|
@@ -269,13 +300,13 @@ int find_among(struct SN_env * z, const struct among * v, int v_size) {
|
|
269
300
|
|
270
301
|
/* find_among_b is for backwards processing. Same comments apply */
|
271
302
|
|
272
|
-
int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
|
303
|
+
extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
|
273
304
|
|
274
305
|
int i = 0;
|
275
306
|
int j = v_size;
|
276
307
|
|
277
308
|
int c = z->c; int lb = z->lb;
|
278
|
-
symbol * q = z->p + c - 1;
|
309
|
+
const symbol * q = z->p + c - 1;
|
279
310
|
|
280
311
|
const struct among * w;
|
281
312
|
|
@@ -284,7 +315,7 @@ int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
|
|
284
315
|
|
285
316
|
int first_key_inspected = 0;
|
286
317
|
|
287
|
-
while(1) {
|
318
|
+
while (1) {
|
288
319
|
int k = i + ((j - i) >> 1);
|
289
320
|
int diff = 0;
|
290
321
|
int common = common_i < common_j ? common_i : common_j;
|
@@ -306,7 +337,7 @@ int find_among_b(struct SN_env * z, const struct among * v, int v_size) {
|
|
306
337
|
first_key_inspected = 1;
|
307
338
|
}
|
308
339
|
}
|
309
|
-
while(1) {
|
340
|
+
while (1) {
|
310
341
|
w = v + i;
|
311
342
|
if (common_i >= w->s_size) {
|
312
343
|
z->c = c - w->s_size;
|
@@ -345,7 +376,7 @@ static symbol * increase_size(symbol * p, int n) {
|
|
345
376
|
Returns 0 on success, -1 on error.
|
346
377
|
Also, frees z->p (and sets it to NULL) on error.
|
347
378
|
*/
|
348
|
-
int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
|
379
|
+
extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr)
|
349
380
|
{
|
350
381
|
int adjustment;
|
351
382
|
int len;
|
@@ -367,11 +398,10 @@ int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol
|
|
367
398
|
z->l += adjustment;
|
368
399
|
if (z->c >= c_ket)
|
369
400
|
z->c += adjustment;
|
370
|
-
else
|
371
|
-
|
372
|
-
z->c = c_bra;
|
401
|
+
else if (z->c > c_bra)
|
402
|
+
z->c = c_bra;
|
373
403
|
}
|
374
|
-
|
404
|
+
if (s_size) memmove(z->p + c_bra, s, s_size * sizeof(symbol));
|
375
405
|
if (adjptr != NULL)
|
376
406
|
*adjptr = adjustment;
|
377
407
|
return 0;
|
@@ -394,20 +424,20 @@ static int slice_check(struct SN_env * z) {
|
|
394
424
|
return 0;
|
395
425
|
}
|
396
426
|
|
397
|
-
int slice_from_s(struct SN_env * z, int s_size, const symbol * s) {
|
427
|
+
extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) {
|
398
428
|
if (slice_check(z)) return -1;
|
399
429
|
return replace_s(z, z->bra, z->ket, s_size, s, NULL);
|
400
430
|
}
|
401
431
|
|
402
|
-
int slice_from_v(struct SN_env * z, const symbol * p) {
|
432
|
+
extern int slice_from_v(struct SN_env * z, const symbol * p) {
|
403
433
|
return slice_from_s(z, SIZE(p), p);
|
404
434
|
}
|
405
435
|
|
406
|
-
int slice_del(struct SN_env * z) {
|
436
|
+
extern int slice_del(struct SN_env * z) {
|
407
437
|
return slice_from_s(z, 0, 0);
|
408
438
|
}
|
409
439
|
|
410
|
-
int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) {
|
440
|
+
extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) {
|
411
441
|
int adjustment;
|
412
442
|
if (replace_s(z, bra, ket, s_size, s, &adjustment))
|
413
443
|
return -1;
|
@@ -416,16 +446,11 @@ int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s)
|
|
416
446
|
return 0;
|
417
447
|
}
|
418
448
|
|
419
|
-
int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
|
420
|
-
|
421
|
-
if (replace_s(z, bra, ket, SIZE(p), p, &adjustment))
|
422
|
-
return -1;
|
423
|
-
if (bra <= z->bra) z->bra += adjustment;
|
424
|
-
if (bra <= z->ket) z->ket += adjustment;
|
425
|
-
return 0;
|
449
|
+
extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) {
|
450
|
+
return insert_s(z, bra, ket, SIZE(p), p);
|
426
451
|
}
|
427
452
|
|
428
|
-
symbol * slice_to(struct SN_env * z, symbol * p) {
|
453
|
+
extern symbol * slice_to(struct SN_env * z, symbol * p) {
|
429
454
|
if (slice_check(z)) {
|
430
455
|
lose_s(p);
|
431
456
|
return NULL;
|
@@ -443,7 +468,7 @@ symbol * slice_to(struct SN_env * z, symbol * p) {
|
|
443
468
|
return p;
|
444
469
|
}
|
445
470
|
|
446
|
-
symbol * assign_to(struct SN_env * z, symbol * p) {
|
471
|
+
extern symbol * assign_to(struct SN_env * z, symbol * p) {
|
447
472
|
int len = z->l;
|
448
473
|
if (CAPACITY(p) < len) {
|
449
474
|
p = increase_size(p, len);
|
@@ -455,8 +480,18 @@ symbol * assign_to(struct SN_env * z, symbol * p) {
|
|
455
480
|
return p;
|
456
481
|
}
|
457
482
|
|
483
|
+
extern int len_utf8(const symbol * p) {
|
484
|
+
int size = SIZE(p);
|
485
|
+
int len = 0;
|
486
|
+
while (size--) {
|
487
|
+
symbol b = *p++;
|
488
|
+
if (b >= 0xC0 || b < 0x80) ++len;
|
489
|
+
}
|
490
|
+
return len;
|
491
|
+
}
|
492
|
+
|
458
493
|
#if 0
|
459
|
-
void debug(struct SN_env * z, int number, int line_count) {
|
494
|
+
extern void debug(struct SN_env * z, int number, int line_count) {
|
460
495
|
int i;
|
461
496
|
int limit = SIZE(z->p);
|
462
497
|
/*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/
|
@@ -774,6 +774,12 @@ static VALUE frb_ts_term_vectors(VALUE v) { return INT2FIX(execute_test(33)); }
|
|
774
774
|
static VALUE frb_ts_test(VALUE v) { return INT2FIX(execute_test(34)); }
|
775
775
|
static VALUE frb_ts_threading(VALUE v) { return INT2FIX(execute_test(35)); }
|
776
776
|
|
777
|
+
static VALUE frb_ts_posh(VALUE v) {
|
778
|
+
const char *posh = POSH_GetArchString();
|
779
|
+
printf("\n%s\n", posh);
|
780
|
+
return Qnil;
|
781
|
+
}
|
782
|
+
|
777
783
|
static VALUE frb_ts_run_all(VALUE v) {
|
778
784
|
int i, test_count;
|
779
785
|
int rv = 0;
|
@@ -845,6 +851,6 @@ void Init_Test(void) {
|
|
845
851
|
rb_define_singleton_method(mTest, "term_vectors", frb_ts_term_vectors, 0);
|
846
852
|
rb_define_singleton_method(mTest, "test", frb_ts_test, 0);
|
847
853
|
rb_define_singleton_method(mTest, "threading", frb_ts_threading, 0);
|
848
|
-
|
854
|
+
rb_define_singleton_method(mTest, "posh", frb_ts_posh, 0);
|
849
855
|
rb_define_singleton_method(mTest, "run_all", frb_ts_run_all, 0);
|
850
856
|
}
|
@@ -556,7 +556,6 @@ static void test_phrase_query(TestCase *tc, void *data)
|
|
556
556
|
check_to_s(tc, phq, field, "\"\"");
|
557
557
|
check_to_s(tc, phq, (FrtSymbol)NULL, "field:\"\"");
|
558
558
|
|
559
|
-
|
560
559
|
frt_phq_add_term(phq, "quick", 1);
|
561
560
|
frt_phq_add_term(phq, "brown", 1);
|
562
561
|
frt_phq_add_term(phq, "fox", 1);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isomorfeus-ferret
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Biedermann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-02-
|
11
|
+
date: 2022-02-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -152,12 +152,15 @@ files:
|
|
152
152
|
- ext/isomorfeus_ferret_ext/isomorfeus_ferret.h
|
153
153
|
- ext/isomorfeus_ferret_ext/libstemmer.c
|
154
154
|
- ext/isomorfeus_ferret_ext/libstemmer.h
|
155
|
-
- ext/isomorfeus_ferret_ext/q_parser.y
|
156
155
|
- ext/isomorfeus_ferret_ext/scanner.h
|
157
156
|
- ext/isomorfeus_ferret_ext/scanner.in
|
158
157
|
- ext/isomorfeus_ferret_ext/scanner.rl
|
159
158
|
- ext/isomorfeus_ferret_ext/scanner_mb.rl
|
160
159
|
- ext/isomorfeus_ferret_ext/scanner_utf8.rl
|
160
|
+
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c
|
161
|
+
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h
|
162
|
+
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c
|
163
|
+
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h
|
161
164
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c
|
162
165
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h
|
163
166
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c
|
@@ -172,6 +175,10 @@ files:
|
|
172
175
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h
|
173
176
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c
|
174
177
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h
|
178
|
+
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c
|
179
|
+
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h
|
180
|
+
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c
|
181
|
+
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h
|
175
182
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c
|
176
183
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h
|
177
184
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c
|
@@ -184,10 +191,20 @@ files:
|
|
184
191
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h
|
185
192
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c
|
186
193
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h
|
194
|
+
- ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c
|
195
|
+
- ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h
|
187
196
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c
|
188
197
|
- ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h
|
189
198
|
- ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c
|
190
199
|
- ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h
|
200
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c
|
201
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h
|
202
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c
|
203
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h
|
204
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c
|
205
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h
|
206
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c
|
207
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h
|
191
208
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c
|
192
209
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h
|
193
210
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c
|
@@ -200,10 +217,22 @@ files:
|
|
200
217
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_french.h
|
201
218
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_german.c
|
202
219
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_german.h
|
220
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c
|
221
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h
|
222
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c
|
223
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h
|
203
224
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c
|
204
225
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h
|
226
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c
|
227
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h
|
228
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c
|
229
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h
|
205
230
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c
|
206
231
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h
|
232
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c
|
233
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h
|
234
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c
|
235
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h
|
207
236
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c
|
208
237
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h
|
209
238
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c
|
@@ -214,12 +243,18 @@ files:
|
|
214
243
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h
|
215
244
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c
|
216
245
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h
|
246
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c
|
247
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h
|
217
248
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c
|
218
249
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h
|
219
250
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c
|
220
251
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h
|
252
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c
|
253
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h
|
221
254
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c
|
222
255
|
- ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h
|
256
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c
|
257
|
+
- ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h
|
223
258
|
- ext/isomorfeus_ferret_ext/stem_api.c
|
224
259
|
- ext/isomorfeus_ferret_ext/stem_api.h
|
225
260
|
- ext/isomorfeus_ferret_ext/stem_header.h
|
@@ -280,7 +315,7 @@ files:
|
|
280
315
|
- lib/isomorfeus/ferret/monitor.rb
|
281
316
|
- lib/isomorfeus/ferret/stdlib_patches.rb
|
282
317
|
- lib/isomorfeus/ferret/version.rb
|
283
|
-
homepage:
|
318
|
+
homepage: https://isomorfeus.com
|
284
319
|
licenses:
|
285
320
|
- MIT
|
286
321
|
metadata:
|