ferret 0.11.6 → 0.11.8.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README +10 -22
- data/RELEASE_CHANGES +137 -0
- data/RELEASE_NOTES +60 -0
- data/Rakefile +379 -274
- data/TODO +100 -8
- data/bin/ferret-browser +0 -0
- data/ext/BZLIB_blocksort.c +1094 -0
- data/ext/BZLIB_bzlib.c +1578 -0
- data/ext/BZLIB_compress.c +672 -0
- data/ext/BZLIB_crctable.c +104 -0
- data/ext/BZLIB_decompress.c +626 -0
- data/ext/BZLIB_huffman.c +205 -0
- data/ext/BZLIB_randtable.c +84 -0
- data/ext/{api.c → STEMMER_api.c} +7 -10
- data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
- data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
- data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
- data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
- data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
- data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
- data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
- data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
- data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
- data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
- data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
- data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
- data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
- data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
- data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
- data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
- data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
- data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
- data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
- data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
- data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
- data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
- data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
- data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
- data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
- data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
- data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
- data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
- data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
- data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
- data/ext/analysis.c +276 -121
- data/ext/analysis.h +190 -143
- data/ext/api.h +3 -4
- data/ext/array.c +5 -3
- data/ext/array.h +52 -43
- data/ext/bitvector.c +38 -482
- data/ext/bitvector.h +446 -124
- data/ext/bzlib.h +282 -0
- data/ext/bzlib_private.h +503 -0
- data/ext/compound_io.c +23 -22
- data/ext/config.h +21 -11
- data/ext/document.c +43 -40
- data/ext/document.h +31 -21
- data/ext/except.c +20 -38
- data/ext/except.h +89 -76
- data/ext/extconf.rb +3 -2
- data/ext/ferret.c +49 -35
- data/ext/ferret.h +14 -11
- data/ext/field_index.c +262 -0
- data/ext/field_index.h +52 -0
- data/ext/filter.c +11 -10
- data/ext/fs_store.c +65 -47
- data/ext/global.c +245 -165
- data/ext/global.h +252 -54
- data/ext/hash.c +200 -243
- data/ext/hash.h +205 -163
- data/ext/hashset.c +118 -96
- data/ext/hashset.h +110 -82
- data/ext/header.h +19 -19
- data/ext/helper.c +11 -10
- data/ext/helper.h +14 -6
- data/ext/index.c +745 -366
- data/ext/index.h +503 -529
- data/ext/internal.h +1020 -0
- data/ext/lang.c +10 -0
- data/ext/lang.h +35 -15
- data/ext/mempool.c +5 -4
- data/ext/mempool.h +30 -22
- data/ext/modules.h +35 -7
- data/ext/multimapper.c +43 -2
- data/ext/multimapper.h +32 -23
- data/ext/posh.c +0 -0
- data/ext/posh.h +4 -38
- data/ext/priorityqueue.c +10 -12
- data/ext/priorityqueue.h +33 -21
- data/ext/q_boolean.c +22 -9
- data/ext/q_const_score.c +3 -2
- data/ext/q_filtered_query.c +15 -12
- data/ext/q_fuzzy.c +147 -135
- data/ext/q_match_all.c +3 -2
- data/ext/q_multi_term.c +28 -32
- data/ext/q_parser.c +451 -173
- data/ext/q_phrase.c +158 -79
- data/ext/q_prefix.c +16 -18
- data/ext/q_range.c +363 -31
- data/ext/q_span.c +130 -141
- data/ext/q_term.c +21 -21
- data/ext/q_wildcard.c +19 -23
- data/ext/r_analysis.c +369 -242
- data/ext/r_index.c +421 -434
- data/ext/r_qparser.c +142 -92
- data/ext/r_search.c +790 -407
- data/ext/r_store.c +44 -44
- data/ext/r_utils.c +264 -96
- data/ext/ram_store.c +29 -23
- data/ext/scanner.c +895 -0
- data/ext/scanner.h +36 -0
- data/ext/scanner_mb.c +6701 -0
- data/ext/scanner_utf8.c +4415 -0
- data/ext/search.c +210 -87
- data/ext/search.h +556 -488
- data/ext/similarity.c +17 -16
- data/ext/similarity.h +51 -44
- data/ext/sort.c +157 -354
- data/ext/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/stem_UTF_8_hungarian.h +16 -0
- data/ext/stem_UTF_8_romanian.h +16 -0
- data/ext/stem_UTF_8_turkish.h +16 -0
- data/ext/stopwords.c +287 -278
- data/ext/store.c +57 -51
- data/ext/store.h +308 -286
- data/ext/symbol.c +10 -0
- data/ext/symbol.h +23 -0
- data/ext/term_vectors.c +14 -293
- data/ext/threading.h +22 -22
- data/ext/win32.h +12 -4
- data/lib/ferret.rb +2 -1
- data/lib/ferret/browser.rb +1 -1
- data/lib/ferret/field_symbol.rb +94 -0
- data/lib/ferret/index.rb +221 -34
- data/lib/ferret/number_tools.rb +6 -6
- data/lib/ferret/version.rb +3 -0
- data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
- data/test/test_helper.rb +7 -2
- data/test/test_installed.rb +1 -0
- data/test/threading/thread_safety_index_test.rb +10 -1
- data/test/threading/thread_safety_read_write_test.rb +4 -7
- data/test/threading/thread_safety_test.rb +0 -0
- data/test/unit/analysis/tc_analyzer.rb +29 -27
- data/test/unit/analysis/tc_token_stream.rb +23 -16
- data/test/unit/index/tc_index.rb +116 -11
- data/test/unit/index/tc_index_reader.rb +27 -27
- data/test/unit/index/tc_index_writer.rb +10 -0
- data/test/unit/index/th_doc.rb +38 -21
- data/test/unit/search/tc_filter.rb +31 -10
- data/test/unit/search/tc_index_searcher.rb +6 -0
- data/test/unit/search/tm_searcher.rb +53 -1
- data/test/unit/store/tc_fs_store.rb +40 -2
- data/test/unit/store/tc_ram_store.rb +0 -0
- data/test/unit/store/tm_store.rb +0 -0
- data/test/unit/store/tm_store_lock.rb +7 -6
- data/test/unit/tc_field_symbol.rb +26 -0
- data/test/unit/ts_analysis.rb +0 -0
- data/test/unit/ts_index.rb +0 -0
- data/test/unit/ts_store.rb +0 -0
- data/test/unit/ts_utils.rb +0 -0
- data/test/unit/utils/tc_number_tools.rb +0 -0
- data/test/utils/content_generator.rb +226 -0
- metadata +262 -221
- data/ext/inc/lang.h +0 -48
- data/ext/inc/threading.h +0 -31
- data/ext/stem_ISO_8859_1_english.c +0 -1156
- data/ext/stem_ISO_8859_1_french.c +0 -1276
- data/ext/stem_ISO_8859_1_italian.c +0 -1091
- data/ext/stem_ISO_8859_1_norwegian.c +0 -296
- data/ext/stem_ISO_8859_1_spanish.c +0 -1119
- data/ext/stem_ISO_8859_1_swedish.c +0 -307
- data/ext/stem_UTF_8_danish.c +0 -344
- data/ext/stem_UTF_8_english.c +0 -1176
- data/ext/stem_UTF_8_french.c +0 -1296
- data/ext/stem_UTF_8_italian.c +0 -1113
- data/ext/stem_UTF_8_norwegian.c +0 -302
- data/ext/stem_UTF_8_portuguese.c +0 -1055
- data/ext/stem_UTF_8_russian.c +0 -709
- data/ext/stem_UTF_8_spanish.c +0 -1137
- data/ext/stem_UTF_8_swedish.c +0 -313
- data/lib/ferret_version.rb +0 -3
@@ -3,24 +3,38 @@
|
|
3
3
|
|
4
4
|
#include "header.h"
|
5
5
|
|
6
|
+
#ifdef __cplusplus
|
7
|
+
extern "C" {
|
8
|
+
#endif
|
6
9
|
extern int german_UTF_8_stem(struct SN_env * z);
|
10
|
+
#ifdef __cplusplus
|
11
|
+
}
|
12
|
+
#endif
|
7
13
|
static int r_standard_suffix(struct SN_env * z);
|
8
14
|
static int r_R2(struct SN_env * z);
|
9
15
|
static int r_R1(struct SN_env * z);
|
10
16
|
static int r_mark_regions(struct SN_env * z);
|
11
17
|
static int r_postlude(struct SN_env * z);
|
12
18
|
static int r_prelude(struct SN_env * z);
|
19
|
+
#ifdef __cplusplus
|
20
|
+
extern "C" {
|
21
|
+
#endif
|
22
|
+
|
13
23
|
|
14
24
|
extern struct SN_env * german_UTF_8_create_env(void);
|
15
25
|
extern void german_UTF_8_close_env(struct SN_env * z);
|
16
26
|
|
17
|
-
static symbol s_0_1[1] = { 'U' };
|
18
|
-
static symbol s_0_2[1] = { 'Y' };
|
19
|
-
static symbol s_0_3[2] = { 0xC3, 0xA4 };
|
20
|
-
static symbol s_0_4[2] = { 0xC3, 0xB6 };
|
21
|
-
static symbol s_0_5[2] = { 0xC3, 0xBC };
|
22
27
|
|
23
|
-
|
28
|
+
#ifdef __cplusplus
|
29
|
+
}
|
30
|
+
#endif
|
31
|
+
static const symbol s_0_1[1] = { 'U' };
|
32
|
+
static const symbol s_0_2[1] = { 'Y' };
|
33
|
+
static const symbol s_0_3[2] = { 0xC3, 0xA4 };
|
34
|
+
static const symbol s_0_4[2] = { 0xC3, 0xB6 };
|
35
|
+
static const symbol s_0_5[2] = { 0xC3, 0xBC };
|
36
|
+
|
37
|
+
static const struct among a_0[6] =
|
24
38
|
{
|
25
39
|
/* 0 */ { 0, 0, -1, 6, 0},
|
26
40
|
/* 1 */ { 1, s_0_1, 0, 2, 0},
|
@@ -30,15 +44,15 @@ static struct among a_0[6] =
|
|
30
44
|
/* 5 */ { 2, s_0_5, 0, 5, 0}
|
31
45
|
};
|
32
46
|
|
33
|
-
static symbol s_1_0[1] = { 'e' };
|
34
|
-
static symbol s_1_1[2] = { 'e', 'm' };
|
35
|
-
static symbol s_1_2[2] = { 'e', 'n' };
|
36
|
-
static symbol s_1_3[3] = { 'e', 'r', 'n' };
|
37
|
-
static symbol s_1_4[2] = { 'e', 'r' };
|
38
|
-
static symbol s_1_5[1] = { 's' };
|
39
|
-
static symbol s_1_6[2] = { 'e', 's' };
|
47
|
+
static const symbol s_1_0[1] = { 'e' };
|
48
|
+
static const symbol s_1_1[2] = { 'e', 'm' };
|
49
|
+
static const symbol s_1_2[2] = { 'e', 'n' };
|
50
|
+
static const symbol s_1_3[3] = { 'e', 'r', 'n' };
|
51
|
+
static const symbol s_1_4[2] = { 'e', 'r' };
|
52
|
+
static const symbol s_1_5[1] = { 's' };
|
53
|
+
static const symbol s_1_6[2] = { 'e', 's' };
|
40
54
|
|
41
|
-
static struct among a_1[7] =
|
55
|
+
static const struct among a_1[7] =
|
42
56
|
{
|
43
57
|
/* 0 */ { 1, s_1_0, -1, 1, 0},
|
44
58
|
/* 1 */ { 2, s_1_1, -1, 1, 0},
|
@@ -49,12 +63,12 @@ static struct among a_1[7] =
|
|
49
63
|
/* 6 */ { 2, s_1_6, 5, 1, 0}
|
50
64
|
};
|
51
65
|
|
52
|
-
static symbol s_2_0[2] = { 'e', 'n' };
|
53
|
-
static symbol s_2_1[2] = { 'e', 'r' };
|
54
|
-
static symbol s_2_2[2] = { 's', 't' };
|
55
|
-
static symbol s_2_3[3] = { 'e', 's', 't' };
|
66
|
+
static const symbol s_2_0[2] = { 'e', 'n' };
|
67
|
+
static const symbol s_2_1[2] = { 'e', 'r' };
|
68
|
+
static const symbol s_2_2[2] = { 's', 't' };
|
69
|
+
static const symbol s_2_3[3] = { 'e', 's', 't' };
|
56
70
|
|
57
|
-
static struct among a_2[4] =
|
71
|
+
static const struct among a_2[4] =
|
58
72
|
{
|
59
73
|
/* 0 */ { 2, s_2_0, -1, 1, 0},
|
60
74
|
/* 1 */ { 2, s_2_1, -1, 1, 0},
|
@@ -62,25 +76,25 @@ static struct among a_2[4] =
|
|
62
76
|
/* 3 */ { 3, s_2_3, 2, 1, 0}
|
63
77
|
};
|
64
78
|
|
65
|
-
static symbol s_3_0[2] = { 'i', 'g' };
|
66
|
-
static symbol s_3_1[4] = { 'l', 'i', 'c', 'h' };
|
79
|
+
static const symbol s_3_0[2] = { 'i', 'g' };
|
80
|
+
static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' };
|
67
81
|
|
68
|
-
static struct among a_3[2] =
|
82
|
+
static const struct among a_3[2] =
|
69
83
|
{
|
70
84
|
/* 0 */ { 2, s_3_0, -1, 1, 0},
|
71
85
|
/* 1 */ { 4, s_3_1, -1, 1, 0}
|
72
86
|
};
|
73
87
|
|
74
|
-
static symbol s_4_0[3] = { 'e', 'n', 'd' };
|
75
|
-
static symbol s_4_1[2] = { 'i', 'g' };
|
76
|
-
static symbol s_4_2[3] = { 'u', 'n', 'g' };
|
77
|
-
static symbol s_4_3[4] = { 'l', 'i', 'c', 'h' };
|
78
|
-
static symbol s_4_4[4] = { 'i', 's', 'c', 'h' };
|
79
|
-
static symbol s_4_5[2] = { 'i', 'k' };
|
80
|
-
static symbol s_4_6[4] = { 'h', 'e', 'i', 't' };
|
81
|
-
static symbol s_4_7[4] = { 'k', 'e', 'i', 't' };
|
88
|
+
static const symbol s_4_0[3] = { 'e', 'n', 'd' };
|
89
|
+
static const symbol s_4_1[2] = { 'i', 'g' };
|
90
|
+
static const symbol s_4_2[3] = { 'u', 'n', 'g' };
|
91
|
+
static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' };
|
92
|
+
static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' };
|
93
|
+
static const symbol s_4_5[2] = { 'i', 'k' };
|
94
|
+
static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' };
|
95
|
+
static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' };
|
82
96
|
|
83
|
-
static struct among a_4[8] =
|
97
|
+
static const struct among a_4[8] =
|
84
98
|
{
|
85
99
|
/* 0 */ { 3, s_4_0, -1, 1, 0},
|
86
100
|
/* 1 */ { 2, s_4_1, -1, 2, 0},
|
@@ -92,95 +106,92 @@ static struct among a_4[8] =
|
|
92
106
|
/* 7 */ { 4, s_4_7, -1, 4, 0}
|
93
107
|
};
|
94
108
|
|
95
|
-
static unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 };
|
109
|
+
static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 };
|
96
110
|
|
97
|
-
static unsigned char g_s_ending[] = { 117, 30, 5 };
|
111
|
+
static const unsigned char g_s_ending[] = { 117, 30, 5 };
|
98
112
|
|
99
|
-
static unsigned char g_st_ending[] = { 117, 30, 4 };
|
113
|
+
static const unsigned char g_st_ending[] = { 117, 30, 4 };
|
100
114
|
|
101
|
-
static symbol s_0[] = { 0xC3, 0x9F };
|
102
|
-
static symbol s_1[] = { 's', 's' };
|
103
|
-
static symbol s_2[] = { 'u' };
|
104
|
-
static symbol s_3[] = { 'U' };
|
105
|
-
static symbol s_4[] = { 'y' };
|
106
|
-
static symbol s_5[] = { 'Y' };
|
107
|
-
static symbol s_6[] = { 'y' };
|
108
|
-
static symbol s_7[] = { 'u' };
|
109
|
-
static symbol s_8[] = { 'a' };
|
110
|
-
static symbol s_9[] = { 'o' };
|
111
|
-
static symbol s_10[] = { 'u' };
|
112
|
-
static symbol s_11[] = { 'i', 'g' };
|
113
|
-
static symbol s_12[] = { 'e' };
|
114
|
-
static symbol s_13[] = { 'e' };
|
115
|
-
static symbol s_14[] = { 'e', 'r' };
|
116
|
-
static symbol s_15[] = { 'e', 'n' };
|
115
|
+
static const symbol s_0[] = { 0xC3, 0x9F };
|
116
|
+
static const symbol s_1[] = { 's', 's' };
|
117
|
+
static const symbol s_2[] = { 'u' };
|
118
|
+
static const symbol s_3[] = { 'U' };
|
119
|
+
static const symbol s_4[] = { 'y' };
|
120
|
+
static const symbol s_5[] = { 'Y' };
|
121
|
+
static const symbol s_6[] = { 'y' };
|
122
|
+
static const symbol s_7[] = { 'u' };
|
123
|
+
static const symbol s_8[] = { 'a' };
|
124
|
+
static const symbol s_9[] = { 'o' };
|
125
|
+
static const symbol s_10[] = { 'u' };
|
126
|
+
static const symbol s_11[] = { 'i', 'g' };
|
127
|
+
static const symbol s_12[] = { 'e' };
|
128
|
+
static const symbol s_13[] = { 'e' };
|
129
|
+
static const symbol s_14[] = { 'e', 'r' };
|
130
|
+
static const symbol s_15[] = { 'e', 'n' };
|
117
131
|
|
118
132
|
static int r_prelude(struct SN_env * z) {
|
119
133
|
{ int c_test = z->c; /* test, line 30 */
|
120
134
|
while(1) { /* repeat, line 30 */
|
121
|
-
int
|
122
|
-
{ int
|
135
|
+
int c1 = z->c;
|
136
|
+
{ int c2 = z->c; /* or, line 33 */
|
123
137
|
z->bra = z->c; /* [, line 32 */
|
124
138
|
if (!(eq_s(z, 2, s_0))) goto lab2;
|
125
139
|
z->ket = z->c; /* ], line 32 */
|
126
|
-
{ int ret;
|
127
|
-
ret = slice_from_s(z, 2, s_1); /* <-, line 32 */
|
140
|
+
{ int ret = slice_from_s(z, 2, s_1); /* <-, line 32 */
|
128
141
|
if (ret < 0) return ret;
|
129
142
|
}
|
130
143
|
goto lab1;
|
131
144
|
lab2:
|
132
|
-
z->c =
|
133
|
-
{ int
|
134
|
-
if (
|
135
|
-
z->c =
|
145
|
+
z->c = c2;
|
146
|
+
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
|
147
|
+
if (ret < 0) goto lab0;
|
148
|
+
z->c = ret; /* next, line 33 */
|
136
149
|
}
|
137
150
|
}
|
138
151
|
lab1:
|
139
152
|
continue;
|
140
153
|
lab0:
|
141
|
-
z->c =
|
154
|
+
z->c = c1;
|
142
155
|
break;
|
143
156
|
}
|
144
157
|
z->c = c_test;
|
145
158
|
}
|
146
159
|
while(1) { /* repeat, line 36 */
|
147
|
-
int
|
160
|
+
int c3 = z->c;
|
148
161
|
while(1) { /* goto, line 36 */
|
149
|
-
int
|
150
|
-
if (
|
162
|
+
int c4 = z->c;
|
163
|
+
if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4;
|
151
164
|
z->bra = z->c; /* [, line 37 */
|
152
|
-
{ int
|
165
|
+
{ int c5 = z->c; /* or, line 37 */
|
153
166
|
if (!(eq_s(z, 1, s_2))) goto lab6;
|
154
167
|
z->ket = z->c; /* ], line 37 */
|
155
|
-
if (
|
156
|
-
{ int ret;
|
157
|
-
ret = slice_from_s(z, 1, s_3); /* <-, line 37 */
|
168
|
+
if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab6;
|
169
|
+
{ int ret = slice_from_s(z, 1, s_3); /* <-, line 37 */
|
158
170
|
if (ret < 0) return ret;
|
159
171
|
}
|
160
172
|
goto lab5;
|
161
173
|
lab6:
|
162
|
-
z->c =
|
174
|
+
z->c = c5;
|
163
175
|
if (!(eq_s(z, 1, s_4))) goto lab4;
|
164
176
|
z->ket = z->c; /* ], line 38 */
|
165
|
-
if (
|
166
|
-
{ int ret;
|
167
|
-
ret = slice_from_s(z, 1, s_5); /* <-, line 38 */
|
177
|
+
if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4;
|
178
|
+
{ int ret = slice_from_s(z, 1, s_5); /* <-, line 38 */
|
168
179
|
if (ret < 0) return ret;
|
169
180
|
}
|
170
181
|
}
|
171
182
|
lab5:
|
172
|
-
z->c =
|
183
|
+
z->c = c4;
|
173
184
|
break;
|
174
185
|
lab4:
|
175
|
-
z->c =
|
176
|
-
{ int
|
177
|
-
if (
|
178
|
-
z->c =
|
186
|
+
z->c = c4;
|
187
|
+
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
|
188
|
+
if (ret < 0) goto lab3;
|
189
|
+
z->c = ret; /* goto, line 36 */
|
179
190
|
}
|
180
191
|
}
|
181
192
|
continue;
|
182
193
|
lab3:
|
183
|
-
z->c =
|
194
|
+
z->c = c3;
|
184
195
|
break;
|
185
196
|
}
|
186
197
|
return 1;
|
@@ -190,53 +201,37 @@ static int r_mark_regions(struct SN_env * z) {
|
|
190
201
|
z->I[0] = z->l;
|
191
202
|
z->I[1] = z->l;
|
192
203
|
{ int c_test = z->c; /* test, line 47 */
|
193
|
-
{ int
|
194
|
-
if (
|
195
|
-
z->c =
|
204
|
+
{ int ret = skip_utf8(z->p, z->c, 0, z->l, + 3);
|
205
|
+
if (ret < 0) return 0;
|
206
|
+
z->c = ret; /* hop, line 47 */
|
196
207
|
}
|
197
208
|
z->I[2] = z->c; /* setmark x, line 47 */
|
198
209
|
z->c = c_test;
|
199
210
|
}
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
{ int c = skip_utf8(z->p, z->c, 0, z->l, 1);
|
205
|
-
if (c < 0) return 0;
|
206
|
-
z->c = c; /* gopast, line 49 */
|
207
|
-
}
|
211
|
+
{ /* gopast */ /* grouping v, line 49 */
|
212
|
+
int ret = out_grouping_U(z, g_v, 97, 252, 1);
|
213
|
+
if (ret < 0) return 0;
|
214
|
+
z->c += ret;
|
208
215
|
}
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
{ int c = skip_utf8(z->p, z->c, 0, z->l, 1);
|
214
|
-
if (c < 0) return 0;
|
215
|
-
z->c = c; /* gopast, line 49 */
|
216
|
-
}
|
216
|
+
{ /* gopast */ /* non v, line 49 */
|
217
|
+
int ret = in_grouping_U(z, g_v, 97, 252, 1);
|
218
|
+
if (ret < 0) return 0;
|
219
|
+
z->c += ret;
|
217
220
|
}
|
218
221
|
z->I[0] = z->c; /* setmark p1, line 49 */
|
219
222
|
/* try, line 50 */
|
220
|
-
if (!(z->I[0] < z->I[2])) goto
|
223
|
+
if (!(z->I[0] < z->I[2])) goto lab0;
|
221
224
|
z->I[0] = z->I[2];
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
{ int c = skip_utf8(z->p, z->c, 0, z->l, 1);
|
228
|
-
if (c < 0) return 0;
|
229
|
-
z->c = c; /* gopast, line 51 */
|
230
|
-
}
|
225
|
+
lab0:
|
226
|
+
{ /* gopast */ /* grouping v, line 51 */
|
227
|
+
int ret = out_grouping_U(z, g_v, 97, 252, 1);
|
228
|
+
if (ret < 0) return 0;
|
229
|
+
z->c += ret;
|
231
230
|
}
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
{ int c = skip_utf8(z->p, z->c, 0, z->l, 1);
|
237
|
-
if (c < 0) return 0;
|
238
|
-
z->c = c; /* gopast, line 51 */
|
239
|
-
}
|
231
|
+
{ /* gopast */ /* non v, line 51 */
|
232
|
+
int ret = in_grouping_U(z, g_v, 97, 252, 1);
|
233
|
+
if (ret < 0) return 0;
|
234
|
+
z->c += ret;
|
240
235
|
}
|
241
236
|
z->I[1] = z->c; /* setmark p2, line 51 */
|
242
237
|
return 1;
|
@@ -245,7 +240,7 @@ lab2:
|
|
245
240
|
static int r_postlude(struct SN_env * z) {
|
246
241
|
int among_var;
|
247
242
|
while(1) { /* repeat, line 55 */
|
248
|
-
int
|
243
|
+
int c1 = z->c;
|
249
244
|
z->bra = z->c; /* [, line 57 */
|
250
245
|
among_var = find_among(z, a_0, 6); /* substring, line 57 */
|
251
246
|
if (!(among_var)) goto lab0;
|
@@ -253,45 +248,40 @@ static int r_postlude(struct SN_env * z) {
|
|
253
248
|
switch(among_var) {
|
254
249
|
case 0: goto lab0;
|
255
250
|
case 1:
|
256
|
-
{ int ret;
|
257
|
-
ret = slice_from_s(z, 1, s_6); /* <-, line 58 */
|
251
|
+
{ int ret = slice_from_s(z, 1, s_6); /* <-, line 58 */
|
258
252
|
if (ret < 0) return ret;
|
259
253
|
}
|
260
254
|
break;
|
261
255
|
case 2:
|
262
|
-
{ int ret;
|
263
|
-
ret = slice_from_s(z, 1, s_7); /* <-, line 59 */
|
256
|
+
{ int ret = slice_from_s(z, 1, s_7); /* <-, line 59 */
|
264
257
|
if (ret < 0) return ret;
|
265
258
|
}
|
266
259
|
break;
|
267
260
|
case 3:
|
268
|
-
{ int ret;
|
269
|
-
ret = slice_from_s(z, 1, s_8); /* <-, line 60 */
|
261
|
+
{ int ret = slice_from_s(z, 1, s_8); /* <-, line 60 */
|
270
262
|
if (ret < 0) return ret;
|
271
263
|
}
|
272
264
|
break;
|
273
265
|
case 4:
|
274
|
-
{ int ret;
|
275
|
-
ret = slice_from_s(z, 1, s_9); /* <-, line 61 */
|
266
|
+
{ int ret = slice_from_s(z, 1, s_9); /* <-, line 61 */
|
276
267
|
if (ret < 0) return ret;
|
277
268
|
}
|
278
269
|
break;
|
279
270
|
case 5:
|
280
|
-
{ int ret;
|
281
|
-
ret = slice_from_s(z, 1, s_10); /* <-, line 62 */
|
271
|
+
{ int ret = slice_from_s(z, 1, s_10); /* <-, line 62 */
|
282
272
|
if (ret < 0) return ret;
|
283
273
|
}
|
284
274
|
break;
|
285
275
|
case 6:
|
286
|
-
{ int
|
287
|
-
if (
|
288
|
-
z->c =
|
276
|
+
{ int ret = skip_utf8(z->p, z->c, 0, z->l, 1);
|
277
|
+
if (ret < 0) goto lab0;
|
278
|
+
z->c = ret; /* next, line 63 */
|
289
279
|
}
|
290
280
|
break;
|
291
281
|
}
|
292
282
|
continue;
|
293
283
|
lab0:
|
294
|
-
z->c =
|
284
|
+
z->c = c1;
|
295
285
|
break;
|
296
286
|
}
|
297
287
|
return 1;
|
@@ -309,8 +299,9 @@ static int r_R2(struct SN_env * z) {
|
|
309
299
|
|
310
300
|
static int r_standard_suffix(struct SN_env * z) {
|
311
301
|
int among_var;
|
312
|
-
{ int
|
302
|
+
{ int m1 = z->l - z->c; (void)m1; /* do, line 74 */
|
313
303
|
z->ket = z->c; /* [, line 75 */
|
304
|
+
if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0;
|
314
305
|
among_var = find_among_b(z, a_1, 7); /* substring, line 75 */
|
315
306
|
if (!(among_var)) goto lab0;
|
316
307
|
z->bra = z->c; /* ], line 75 */
|
@@ -321,24 +312,23 @@ static int r_standard_suffix(struct SN_env * z) {
|
|
321
312
|
switch(among_var) {
|
322
313
|
case 0: goto lab0;
|
323
314
|
case 1:
|
324
|
-
{ int ret;
|
325
|
-
ret = slice_del(z); /* delete, line 77 */
|
315
|
+
{ int ret = slice_del(z); /* delete, line 77 */
|
326
316
|
if (ret < 0) return ret;
|
327
317
|
}
|
328
318
|
break;
|
329
319
|
case 2:
|
330
|
-
if (
|
331
|
-
{ int ret;
|
332
|
-
ret = slice_del(z); /* delete, line 80 */
|
320
|
+
if (in_grouping_b_U(z, g_s_ending, 98, 116, 0)) goto lab0;
|
321
|
+
{ int ret = slice_del(z); /* delete, line 80 */
|
333
322
|
if (ret < 0) return ret;
|
334
323
|
}
|
335
324
|
break;
|
336
325
|
}
|
337
326
|
lab0:
|
338
|
-
z->c = z->l -
|
327
|
+
z->c = z->l - m1;
|
339
328
|
}
|
340
|
-
{ int
|
329
|
+
{ int m2 = z->l - z->c; (void)m2; /* do, line 84 */
|
341
330
|
z->ket = z->c; /* [, line 85 */
|
331
|
+
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab1;
|
342
332
|
among_var = find_among_b(z, a_2, 4); /* substring, line 85 */
|
343
333
|
if (!(among_var)) goto lab1;
|
344
334
|
z->bra = z->c; /* ], line 85 */
|
@@ -349,28 +339,27 @@ static int r_standard_suffix(struct SN_env * z) {
|
|
349
339
|
switch(among_var) {
|
350
340
|
case 0: goto lab1;
|
351
341
|
case 1:
|
352
|
-
{ int ret;
|
353
|
-
ret = slice_del(z); /* delete, line 87 */
|
342
|
+
{ int ret = slice_del(z); /* delete, line 87 */
|
354
343
|
if (ret < 0) return ret;
|
355
344
|
}
|
356
345
|
break;
|
357
346
|
case 2:
|
358
|
-
if (
|
359
|
-
{ int
|
360
|
-
if (
|
361
|
-
z->c =
|
347
|
+
if (in_grouping_b_U(z, g_st_ending, 98, 116, 0)) goto lab1;
|
348
|
+
{ int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 3);
|
349
|
+
if (ret < 0) goto lab1;
|
350
|
+
z->c = ret; /* hop, line 90 */
|
362
351
|
}
|
363
|
-
{ int ret;
|
364
|
-
ret = slice_del(z); /* delete, line 90 */
|
352
|
+
{ int ret = slice_del(z); /* delete, line 90 */
|
365
353
|
if (ret < 0) return ret;
|
366
354
|
}
|
367
355
|
break;
|
368
356
|
}
|
369
357
|
lab1:
|
370
|
-
z->c = z->l -
|
358
|
+
z->c = z->l - m2;
|
371
359
|
}
|
372
|
-
{ int
|
360
|
+
{ int m3 = z->l - z->c; (void)m3; /* do, line 94 */
|
373
361
|
z->ket = z->c; /* [, line 95 */
|
362
|
+
if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2;
|
374
363
|
among_var = find_among_b(z, a_4, 8); /* substring, line 95 */
|
375
364
|
if (!(among_var)) goto lab2;
|
376
365
|
z->bra = z->c; /* ], line 95 */
|
@@ -381,26 +370,24 @@ static int r_standard_suffix(struct SN_env * z) {
|
|
381
370
|
switch(among_var) {
|
382
371
|
case 0: goto lab2;
|
383
372
|
case 1:
|
384
|
-
{ int ret;
|
385
|
-
ret = slice_del(z); /* delete, line 97 */
|
373
|
+
{ int ret = slice_del(z); /* delete, line 97 */
|
386
374
|
if (ret < 0) return ret;
|
387
375
|
}
|
388
|
-
{ int
|
376
|
+
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 98 */
|
389
377
|
z->ket = z->c; /* [, line 98 */
|
390
|
-
if (!(eq_s_b(z, 2, s_11))) { z->c = z->l -
|
378
|
+
if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab3; }
|
391
379
|
z->bra = z->c; /* ], line 98 */
|
392
|
-
{ int
|
380
|
+
{ int m4 = z->l - z->c; (void)m4; /* not, line 98 */
|
393
381
|
if (!(eq_s_b(z, 1, s_12))) goto lab4;
|
394
|
-
{ z->c = z->l -
|
382
|
+
{ z->c = z->l - m_keep; goto lab3; }
|
395
383
|
lab4:
|
396
|
-
z->c = z->l -
|
384
|
+
z->c = z->l - m4;
|
397
385
|
}
|
398
386
|
{ int ret = r_R2(z);
|
399
|
-
if (ret == 0) { z->c = z->l -
|
387
|
+
if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 98 */
|
400
388
|
if (ret < 0) return ret;
|
401
389
|
}
|
402
|
-
{ int ret;
|
403
|
-
ret = slice_del(z); /* delete, line 98 */
|
390
|
+
{ int ret = slice_del(z); /* delete, line 98 */
|
404
391
|
if (ret < 0) return ret;
|
405
392
|
}
|
406
393
|
lab3:
|
@@ -408,39 +395,36 @@ static int r_standard_suffix(struct SN_env * z) {
|
|
408
395
|
}
|
409
396
|
break;
|
410
397
|
case 2:
|
411
|
-
{ int
|
398
|
+
{ int m5 = z->l - z->c; (void)m5; /* not, line 101 */
|
412
399
|
if (!(eq_s_b(z, 1, s_13))) goto lab5;
|
413
400
|
goto lab2;
|
414
401
|
lab5:
|
415
|
-
z->c = z->l -
|
402
|
+
z->c = z->l - m5;
|
416
403
|
}
|
417
|
-
{ int ret;
|
418
|
-
ret = slice_del(z); /* delete, line 101 */
|
404
|
+
{ int ret = slice_del(z); /* delete, line 101 */
|
419
405
|
if (ret < 0) return ret;
|
420
406
|
}
|
421
407
|
break;
|
422
408
|
case 3:
|
423
|
-
{ int ret;
|
424
|
-
ret = slice_del(z); /* delete, line 104 */
|
409
|
+
{ int ret = slice_del(z); /* delete, line 104 */
|
425
410
|
if (ret < 0) return ret;
|
426
411
|
}
|
427
|
-
{ int
|
412
|
+
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 105 */
|
428
413
|
z->ket = z->c; /* [, line 106 */
|
429
|
-
{ int
|
414
|
+
{ int m6 = z->l - z->c; (void)m6; /* or, line 106 */
|
430
415
|
if (!(eq_s_b(z, 2, s_14))) goto lab8;
|
431
416
|
goto lab7;
|
432
417
|
lab8:
|
433
|
-
z->c = z->l -
|
434
|
-
if (!(eq_s_b(z, 2, s_15))) { z->c = z->l -
|
418
|
+
z->c = z->l - m6;
|
419
|
+
if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab6; }
|
435
420
|
}
|
436
421
|
lab7:
|
437
422
|
z->bra = z->c; /* ], line 106 */
|
438
423
|
{ int ret = r_R1(z);
|
439
|
-
if (ret == 0) { z->c = z->l -
|
424
|
+
if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R1, line 106 */
|
440
425
|
if (ret < 0) return ret;
|
441
426
|
}
|
442
|
-
{ int ret;
|
443
|
-
ret = slice_del(z); /* delete, line 106 */
|
427
|
+
{ int ret = slice_del(z); /* delete, line 106 */
|
444
428
|
if (ret < 0) return ret;
|
445
429
|
}
|
446
430
|
lab6:
|
@@ -448,24 +432,23 @@ static int r_standard_suffix(struct SN_env * z) {
|
|
448
432
|
}
|
449
433
|
break;
|
450
434
|
case 4:
|
451
|
-
{ int ret;
|
452
|
-
ret = slice_del(z); /* delete, line 110 */
|
435
|
+
{ int ret = slice_del(z); /* delete, line 110 */
|
453
436
|
if (ret < 0) return ret;
|
454
437
|
}
|
455
|
-
{ int
|
438
|
+
{ int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */
|
456
439
|
z->ket = z->c; /* [, line 112 */
|
440
|
+
if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab9; }
|
457
441
|
among_var = find_among_b(z, a_3, 2); /* substring, line 112 */
|
458
|
-
if (!(among_var)) { z->c = z->l -
|
442
|
+
if (!(among_var)) { z->c = z->l - m_keep; goto lab9; }
|
459
443
|
z->bra = z->c; /* ], line 112 */
|
460
444
|
{ int ret = r_R2(z);
|
461
|
-
if (ret == 0) { z->c = z->l -
|
445
|
+
if (ret == 0) { z->c = z->l - m_keep; goto lab9; } /* call R2, line 112 */
|
462
446
|
if (ret < 0) return ret;
|
463
447
|
}
|
464
448
|
switch(among_var) {
|
465
|
-
case 0: { z->c = z->l -
|
449
|
+
case 0: { z->c = z->l - m_keep; goto lab9; }
|
466
450
|
case 1:
|
467
|
-
{ int ret;
|
468
|
-
ret = slice_del(z); /* delete, line 114 */
|
451
|
+
{ int ret = slice_del(z); /* delete, line 114 */
|
469
452
|
if (ret < 0) return ret;
|
470
453
|
}
|
471
454
|
break;
|
@@ -476,51 +459,51 @@ static int r_standard_suffix(struct SN_env * z) {
|
|
476
459
|
break;
|
477
460
|
}
|
478
461
|
lab2:
|
479
|
-
z->c = z->l -
|
462
|
+
z->c = z->l - m3;
|
480
463
|
}
|
481
464
|
return 1;
|
482
465
|
}
|
483
466
|
|
484
467
|
extern int german_UTF_8_stem(struct SN_env * z) {
|
485
|
-
{ int
|
468
|
+
{ int c1 = z->c; /* do, line 125 */
|
486
469
|
{ int ret = r_prelude(z);
|
487
470
|
if (ret == 0) goto lab0; /* call prelude, line 125 */
|
488
471
|
if (ret < 0) return ret;
|
489
472
|
}
|
490
473
|
lab0:
|
491
|
-
z->c =
|
474
|
+
z->c = c1;
|
492
475
|
}
|
493
|
-
{ int
|
476
|
+
{ int c2 = z->c; /* do, line 126 */
|
494
477
|
{ int ret = r_mark_regions(z);
|
495
478
|
if (ret == 0) goto lab1; /* call mark_regions, line 126 */
|
496
479
|
if (ret < 0) return ret;
|
497
480
|
}
|
498
481
|
lab1:
|
499
|
-
z->c =
|
482
|
+
z->c = c2;
|
500
483
|
}
|
501
484
|
z->lb = z->c; z->c = z->l; /* backwards, line 127 */
|
502
485
|
|
503
|
-
{ int
|
486
|
+
{ int m3 = z->l - z->c; (void)m3; /* do, line 128 */
|
504
487
|
{ int ret = r_standard_suffix(z);
|
505
488
|
if (ret == 0) goto lab2; /* call standard_suffix, line 128 */
|
506
489
|
if (ret < 0) return ret;
|
507
490
|
}
|
508
491
|
lab2:
|
509
|
-
z->c = z->l -
|
492
|
+
z->c = z->l - m3;
|
510
493
|
}
|
511
494
|
z->c = z->lb;
|
512
|
-
{ int
|
495
|
+
{ int c4 = z->c; /* do, line 129 */
|
513
496
|
{ int ret = r_postlude(z);
|
514
497
|
if (ret == 0) goto lab3; /* call postlude, line 129 */
|
515
498
|
if (ret < 0) return ret;
|
516
499
|
}
|
517
500
|
lab3:
|
518
|
-
z->c =
|
501
|
+
z->c = c4;
|
519
502
|
}
|
520
503
|
return 1;
|
521
504
|
}
|
522
505
|
|
523
506
|
extern struct SN_env * german_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); }
|
524
507
|
|
525
|
-
extern void german_UTF_8_close_env(struct SN_env * z) { SN_close_env(z); }
|
508
|
+
extern void german_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); }
|
526
509
|
|