ferret 0.11.6 → 0.11.8.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. data/README +10 -22
  2. data/RELEASE_CHANGES +137 -0
  3. data/RELEASE_NOTES +60 -0
  4. data/Rakefile +379 -274
  5. data/TODO +100 -8
  6. data/bin/ferret-browser +0 -0
  7. data/ext/BZLIB_blocksort.c +1094 -0
  8. data/ext/BZLIB_bzlib.c +1578 -0
  9. data/ext/BZLIB_compress.c +672 -0
  10. data/ext/BZLIB_crctable.c +104 -0
  11. data/ext/BZLIB_decompress.c +626 -0
  12. data/ext/BZLIB_huffman.c +205 -0
  13. data/ext/BZLIB_randtable.c +84 -0
  14. data/ext/{api.c → STEMMER_api.c} +7 -10
  15. data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
  16. data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
  17. data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
  18. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  19. data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
  20. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  21. data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
  22. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  25. data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
  26. data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
  27. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  29. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  30. data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
  31. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  32. data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
  33. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  34. data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
  35. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  36. data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
  37. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  38. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  39. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  40. data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
  41. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  42. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  43. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  44. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  45. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  46. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  47. data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
  48. data/ext/analysis.c +276 -121
  49. data/ext/analysis.h +190 -143
  50. data/ext/api.h +3 -4
  51. data/ext/array.c +5 -3
  52. data/ext/array.h +52 -43
  53. data/ext/bitvector.c +38 -482
  54. data/ext/bitvector.h +446 -124
  55. data/ext/bzlib.h +282 -0
  56. data/ext/bzlib_private.h +503 -0
  57. data/ext/compound_io.c +23 -22
  58. data/ext/config.h +21 -11
  59. data/ext/document.c +43 -40
  60. data/ext/document.h +31 -21
  61. data/ext/except.c +20 -38
  62. data/ext/except.h +89 -76
  63. data/ext/extconf.rb +3 -2
  64. data/ext/ferret.c +49 -35
  65. data/ext/ferret.h +14 -11
  66. data/ext/field_index.c +262 -0
  67. data/ext/field_index.h +52 -0
  68. data/ext/filter.c +11 -10
  69. data/ext/fs_store.c +65 -47
  70. data/ext/global.c +245 -165
  71. data/ext/global.h +252 -54
  72. data/ext/hash.c +200 -243
  73. data/ext/hash.h +205 -163
  74. data/ext/hashset.c +118 -96
  75. data/ext/hashset.h +110 -82
  76. data/ext/header.h +19 -19
  77. data/ext/helper.c +11 -10
  78. data/ext/helper.h +14 -6
  79. data/ext/index.c +745 -366
  80. data/ext/index.h +503 -529
  81. data/ext/internal.h +1020 -0
  82. data/ext/lang.c +10 -0
  83. data/ext/lang.h +35 -15
  84. data/ext/mempool.c +5 -4
  85. data/ext/mempool.h +30 -22
  86. data/ext/modules.h +35 -7
  87. data/ext/multimapper.c +43 -2
  88. data/ext/multimapper.h +32 -23
  89. data/ext/posh.c +0 -0
  90. data/ext/posh.h +4 -38
  91. data/ext/priorityqueue.c +10 -12
  92. data/ext/priorityqueue.h +33 -21
  93. data/ext/q_boolean.c +22 -9
  94. data/ext/q_const_score.c +3 -2
  95. data/ext/q_filtered_query.c +15 -12
  96. data/ext/q_fuzzy.c +147 -135
  97. data/ext/q_match_all.c +3 -2
  98. data/ext/q_multi_term.c +28 -32
  99. data/ext/q_parser.c +451 -173
  100. data/ext/q_phrase.c +158 -79
  101. data/ext/q_prefix.c +16 -18
  102. data/ext/q_range.c +363 -31
  103. data/ext/q_span.c +130 -141
  104. data/ext/q_term.c +21 -21
  105. data/ext/q_wildcard.c +19 -23
  106. data/ext/r_analysis.c +369 -242
  107. data/ext/r_index.c +421 -434
  108. data/ext/r_qparser.c +142 -92
  109. data/ext/r_search.c +790 -407
  110. data/ext/r_store.c +44 -44
  111. data/ext/r_utils.c +264 -96
  112. data/ext/ram_store.c +29 -23
  113. data/ext/scanner.c +895 -0
  114. data/ext/scanner.h +36 -0
  115. data/ext/scanner_mb.c +6701 -0
  116. data/ext/scanner_utf8.c +4415 -0
  117. data/ext/search.c +210 -87
  118. data/ext/search.h +556 -488
  119. data/ext/similarity.c +17 -16
  120. data/ext/similarity.h +51 -44
  121. data/ext/sort.c +157 -354
  122. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  123. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  124. data/ext/stem_UTF_8_hungarian.h +16 -0
  125. data/ext/stem_UTF_8_romanian.h +16 -0
  126. data/ext/stem_UTF_8_turkish.h +16 -0
  127. data/ext/stopwords.c +287 -278
  128. data/ext/store.c +57 -51
  129. data/ext/store.h +308 -286
  130. data/ext/symbol.c +10 -0
  131. data/ext/symbol.h +23 -0
  132. data/ext/term_vectors.c +14 -293
  133. data/ext/threading.h +22 -22
  134. data/ext/win32.h +12 -4
  135. data/lib/ferret.rb +2 -1
  136. data/lib/ferret/browser.rb +1 -1
  137. data/lib/ferret/field_symbol.rb +94 -0
  138. data/lib/ferret/index.rb +221 -34
  139. data/lib/ferret/number_tools.rb +6 -6
  140. data/lib/ferret/version.rb +3 -0
  141. data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
  142. data/test/test_helper.rb +7 -2
  143. data/test/test_installed.rb +1 -0
  144. data/test/threading/thread_safety_index_test.rb +10 -1
  145. data/test/threading/thread_safety_read_write_test.rb +4 -7
  146. data/test/threading/thread_safety_test.rb +0 -0
  147. data/test/unit/analysis/tc_analyzer.rb +29 -27
  148. data/test/unit/analysis/tc_token_stream.rb +23 -16
  149. data/test/unit/index/tc_index.rb +116 -11
  150. data/test/unit/index/tc_index_reader.rb +27 -27
  151. data/test/unit/index/tc_index_writer.rb +10 -0
  152. data/test/unit/index/th_doc.rb +38 -21
  153. data/test/unit/search/tc_filter.rb +31 -10
  154. data/test/unit/search/tc_index_searcher.rb +6 -0
  155. data/test/unit/search/tm_searcher.rb +53 -1
  156. data/test/unit/store/tc_fs_store.rb +40 -2
  157. data/test/unit/store/tc_ram_store.rb +0 -0
  158. data/test/unit/store/tm_store.rb +0 -0
  159. data/test/unit/store/tm_store_lock.rb +7 -6
  160. data/test/unit/tc_field_symbol.rb +26 -0
  161. data/test/unit/ts_analysis.rb +0 -0
  162. data/test/unit/ts_index.rb +0 -0
  163. data/test/unit/ts_store.rb +0 -0
  164. data/test/unit/ts_utils.rb +0 -0
  165. data/test/unit/utils/tc_number_tools.rb +0 -0
  166. data/test/utils/content_generator.rb +226 -0
  167. metadata +262 -221
  168. data/ext/inc/lang.h +0 -48
  169. data/ext/inc/threading.h +0 -31
  170. data/ext/stem_ISO_8859_1_english.c +0 -1156
  171. data/ext/stem_ISO_8859_1_french.c +0 -1276
  172. data/ext/stem_ISO_8859_1_italian.c +0 -1091
  173. data/ext/stem_ISO_8859_1_norwegian.c +0 -296
  174. data/ext/stem_ISO_8859_1_spanish.c +0 -1119
  175. data/ext/stem_ISO_8859_1_swedish.c +0 -307
  176. data/ext/stem_UTF_8_danish.c +0 -344
  177. data/ext/stem_UTF_8_english.c +0 -1176
  178. data/ext/stem_UTF_8_french.c +0 -1296
  179. data/ext/stem_UTF_8_italian.c +0 -1113
  180. data/ext/stem_UTF_8_norwegian.c +0 -302
  181. data/ext/stem_UTF_8_portuguese.c +0 -1055
  182. data/ext/stem_UTF_8_russian.c +0 -709
  183. data/ext/stem_UTF_8_spanish.c +0 -1137
  184. data/ext/stem_UTF_8_swedish.c +0 -313
  185. data/lib/ferret_version.rb +0 -3
@@ -1,307 +0,0 @@
1
-
2
- /* This file was generated automatically by the Snowball to ANSI C compiler */
3
-
4
- #include "header.h"
5
-
6
- extern int swedish_ISO_8859_1_stem(struct SN_env * z);
7
- static int r_other_suffix(struct SN_env * z);
8
- static int r_consonant_pair(struct SN_env * z);
9
- static int r_main_suffix(struct SN_env * z);
10
- static int r_mark_regions(struct SN_env * z);
11
-
12
- extern struct SN_env * swedish_ISO_8859_1_create_env(void);
13
- extern void swedish_ISO_8859_1_close_env(struct SN_env * z);
14
-
15
- static symbol s_0_0[1] = { 'a' };
16
- static symbol s_0_1[4] = { 'a', 'r', 'n', 'a' };
17
- static symbol s_0_2[4] = { 'e', 'r', 'n', 'a' };
18
- static symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' };
19
- static symbol s_0_4[4] = { 'o', 'r', 'n', 'a' };
20
- static symbol s_0_5[2] = { 'a', 'd' };
21
- static symbol s_0_6[1] = { 'e' };
22
- static symbol s_0_7[3] = { 'a', 'd', 'e' };
23
- static symbol s_0_8[4] = { 'a', 'n', 'd', 'e' };
24
- static symbol s_0_9[4] = { 'a', 'r', 'n', 'e' };
25
- static symbol s_0_10[3] = { 'a', 'r', 'e' };
26
- static symbol s_0_11[4] = { 'a', 's', 't', 'e' };
27
- static symbol s_0_12[2] = { 'e', 'n' };
28
- static symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' };
29
- static symbol s_0_14[4] = { 'a', 'r', 'e', 'n' };
30
- static symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' };
31
- static symbol s_0_16[3] = { 'e', 'r', 'n' };
32
- static symbol s_0_17[2] = { 'a', 'r' };
33
- static symbol s_0_18[2] = { 'e', 'r' };
34
- static symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' };
35
- static symbol s_0_20[2] = { 'o', 'r' };
36
- static symbol s_0_21[1] = { 's' };
37
- static symbol s_0_22[2] = { 'a', 's' };
38
- static symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' };
39
- static symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' };
40
- static symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' };
41
- static symbol s_0_26[2] = { 'e', 's' };
42
- static symbol s_0_27[4] = { 'a', 'd', 'e', 's' };
43
- static symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' };
44
- static symbol s_0_29[3] = { 'e', 'n', 's' };
45
- static symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' };
46
- static symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' };
47
- static symbol s_0_32[4] = { 'e', 'r', 'n', 's' };
48
- static symbol s_0_33[2] = { 'a', 't' };
49
- static symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' };
50
- static symbol s_0_35[3] = { 'h', 'e', 't' };
51
- static symbol s_0_36[3] = { 'a', 's', 't' };
52
-
53
- static struct among a_0[37] =
54
- {
55
- /* 0 */ { 1, s_0_0, -1, 1, 0},
56
- /* 1 */ { 4, s_0_1, 0, 1, 0},
57
- /* 2 */ { 4, s_0_2, 0, 1, 0},
58
- /* 3 */ { 7, s_0_3, 2, 1, 0},
59
- /* 4 */ { 4, s_0_4, 0, 1, 0},
60
- /* 5 */ { 2, s_0_5, -1, 1, 0},
61
- /* 6 */ { 1, s_0_6, -1, 1, 0},
62
- /* 7 */ { 3, s_0_7, 6, 1, 0},
63
- /* 8 */ { 4, s_0_8, 6, 1, 0},
64
- /* 9 */ { 4, s_0_9, 6, 1, 0},
65
- /* 10 */ { 3, s_0_10, 6, 1, 0},
66
- /* 11 */ { 4, s_0_11, 6, 1, 0},
67
- /* 12 */ { 2, s_0_12, -1, 1, 0},
68
- /* 13 */ { 5, s_0_13, 12, 1, 0},
69
- /* 14 */ { 4, s_0_14, 12, 1, 0},
70
- /* 15 */ { 5, s_0_15, 12, 1, 0},
71
- /* 16 */ { 3, s_0_16, -1, 1, 0},
72
- /* 17 */ { 2, s_0_17, -1, 1, 0},
73
- /* 18 */ { 2, s_0_18, -1, 1, 0},
74
- /* 19 */ { 5, s_0_19, 18, 1, 0},
75
- /* 20 */ { 2, s_0_20, -1, 1, 0},
76
- /* 21 */ { 1, s_0_21, -1, 2, 0},
77
- /* 22 */ { 2, s_0_22, 21, 1, 0},
78
- /* 23 */ { 5, s_0_23, 22, 1, 0},
79
- /* 24 */ { 5, s_0_24, 22, 1, 0},
80
- /* 25 */ { 5, s_0_25, 22, 1, 0},
81
- /* 26 */ { 2, s_0_26, 21, 1, 0},
82
- /* 27 */ { 4, s_0_27, 26, 1, 0},
83
- /* 28 */ { 5, s_0_28, 26, 1, 0},
84
- /* 29 */ { 3, s_0_29, 21, 1, 0},
85
- /* 30 */ { 5, s_0_30, 29, 1, 0},
86
- /* 31 */ { 6, s_0_31, 29, 1, 0},
87
- /* 32 */ { 4, s_0_32, 21, 1, 0},
88
- /* 33 */ { 2, s_0_33, -1, 1, 0},
89
- /* 34 */ { 5, s_0_34, -1, 1, 0},
90
- /* 35 */ { 3, s_0_35, -1, 1, 0},
91
- /* 36 */ { 3, s_0_36, -1, 1, 0}
92
- };
93
-
94
- static symbol s_1_0[2] = { 'd', 'd' };
95
- static symbol s_1_1[2] = { 'g', 'd' };
96
- static symbol s_1_2[2] = { 'n', 'n' };
97
- static symbol s_1_3[2] = { 'd', 't' };
98
- static symbol s_1_4[2] = { 'g', 't' };
99
- static symbol s_1_5[2] = { 'k', 't' };
100
- static symbol s_1_6[2] = { 't', 't' };
101
-
102
- static struct among a_1[7] =
103
- {
104
- /* 0 */ { 2, s_1_0, -1, -1, 0},
105
- /* 1 */ { 2, s_1_1, -1, -1, 0},
106
- /* 2 */ { 2, s_1_2, -1, -1, 0},
107
- /* 3 */ { 2, s_1_3, -1, -1, 0},
108
- /* 4 */ { 2, s_1_4, -1, -1, 0},
109
- /* 5 */ { 2, s_1_5, -1, -1, 0},
110
- /* 6 */ { 2, s_1_6, -1, -1, 0}
111
- };
112
-
113
- static symbol s_2_0[2] = { 'i', 'g' };
114
- static symbol s_2_1[3] = { 'l', 'i', 'g' };
115
- static symbol s_2_2[3] = { 'e', 'l', 's' };
116
- static symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' };
117
- static symbol s_2_4[4] = { 'l', 0xF6, 's', 't' };
118
-
119
- static struct among a_2[5] =
120
- {
121
- /* 0 */ { 2, s_2_0, -1, 1, 0},
122
- /* 1 */ { 3, s_2_1, 0, 1, 0},
123
- /* 2 */ { 3, s_2_2, -1, 1, 0},
124
- /* 3 */ { 5, s_2_3, -1, 3, 0},
125
- /* 4 */ { 4, s_2_4, -1, 2, 0}
126
- };
127
-
128
- static unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
129
-
130
- static unsigned char g_s_ending[] = { 119, 127, 149 };
131
-
132
- static symbol s_0[] = { 'l', 0xF6, 's' };
133
- static symbol s_1[] = { 'f', 'u', 'l', 'l' };
134
-
135
- static int r_mark_regions(struct SN_env * z) {
136
- z->I[0] = z->l;
137
- { int c_test = z->c; /* test, line 29 */
138
- { int c = z->c + 3;
139
- if (0 > c || c > z->l) return 0;
140
- z->c = c; /* hop, line 29 */
141
- }
142
- z->I[1] = z->c; /* setmark x, line 29 */
143
- z->c = c_test;
144
- }
145
- while(1) { /* goto, line 30 */
146
- int c = z->c;
147
- if (!(in_grouping(z, g_v, 97, 246))) goto lab0;
148
- z->c = c;
149
- break;
150
- lab0:
151
- z->c = c;
152
- if (z->c >= z->l) return 0;
153
- z->c++; /* goto, line 30 */
154
- }
155
- while(1) { /* gopast, line 30 */
156
- if (!(out_grouping(z, g_v, 97, 246))) goto lab1;
157
- break;
158
- lab1:
159
- if (z->c >= z->l) return 0;
160
- z->c++; /* gopast, line 30 */
161
- }
162
- z->I[0] = z->c; /* setmark p1, line 30 */
163
- /* try, line 31 */
164
- if (!(z->I[0] < z->I[1])) goto lab2;
165
- z->I[0] = z->I[1];
166
- lab2:
167
- return 1;
168
- }
169
-
170
- static int r_main_suffix(struct SN_env * z) {
171
- int among_var;
172
- { int m3; /* setlimit, line 37 */
173
- int m = z->l - z->c; (void) m;
174
- if (z->c < z->I[0]) return 0;
175
- z->c = z->I[0]; /* tomark, line 37 */
176
- m3 = z->lb; z->lb = z->c;
177
- z->c = z->l - m;
178
- z->ket = z->c; /* [, line 37 */
179
- among_var = find_among_b(z, a_0, 37); /* substring, line 37 */
180
- if (!(among_var)) { z->lb = m3; return 0; }
181
- z->bra = z->c; /* ], line 37 */
182
- z->lb = m3;
183
- }
184
- switch(among_var) {
185
- case 0: return 0;
186
- case 1:
187
- { int ret;
188
- ret = slice_del(z); /* delete, line 44 */
189
- if (ret < 0) return ret;
190
- }
191
- break;
192
- case 2:
193
- if (!(in_grouping_b(z, g_s_ending, 98, 121))) return 0;
194
- { int ret;
195
- ret = slice_del(z); /* delete, line 46 */
196
- if (ret < 0) return ret;
197
- }
198
- break;
199
- }
200
- return 1;
201
- }
202
-
203
- static int r_consonant_pair(struct SN_env * z) {
204
- { int m3; /* setlimit, line 50 */
205
- int m = z->l - z->c; (void) m;
206
- if (z->c < z->I[0]) return 0;
207
- z->c = z->I[0]; /* tomark, line 50 */
208
- m3 = z->lb; z->lb = z->c;
209
- z->c = z->l - m;
210
- { int m = z->l - z->c; (void) m; /* and, line 52 */
211
- if (!(find_among_b(z, a_1, 7))) { z->lb = m3; return 0; } /* among, line 51 */
212
- z->c = z->l - m;
213
- z->ket = z->c; /* [, line 52 */
214
- if (z->c <= z->lb) { z->lb = m3; return 0; }
215
- z->c--; /* next, line 52 */
216
- z->bra = z->c; /* ], line 52 */
217
- { int ret;
218
- ret = slice_del(z); /* delete, line 52 */
219
- if (ret < 0) return ret;
220
- }
221
- }
222
- z->lb = m3;
223
- }
224
- return 1;
225
- }
226
-
227
- static int r_other_suffix(struct SN_env * z) {
228
- int among_var;
229
- { int m3; /* setlimit, line 55 */
230
- int m = z->l - z->c; (void) m;
231
- if (z->c < z->I[0]) return 0;
232
- z->c = z->I[0]; /* tomark, line 55 */
233
- m3 = z->lb; z->lb = z->c;
234
- z->c = z->l - m;
235
- z->ket = z->c; /* [, line 56 */
236
- among_var = find_among_b(z, a_2, 5); /* substring, line 56 */
237
- if (!(among_var)) { z->lb = m3; return 0; }
238
- z->bra = z->c; /* ], line 56 */
239
- switch(among_var) {
240
- case 0: { z->lb = m3; return 0; }
241
- case 1:
242
- { int ret;
243
- ret = slice_del(z); /* delete, line 57 */
244
- if (ret < 0) return ret;
245
- }
246
- break;
247
- case 2:
248
- { int ret;
249
- ret = slice_from_s(z, 3, s_0); /* <-, line 58 */
250
- if (ret < 0) return ret;
251
- }
252
- break;
253
- case 3:
254
- { int ret;
255
- ret = slice_from_s(z, 4, s_1); /* <-, line 59 */
256
- if (ret < 0) return ret;
257
- }
258
- break;
259
- }
260
- z->lb = m3;
261
- }
262
- return 1;
263
- }
264
-
265
- extern int swedish_ISO_8859_1_stem(struct SN_env * z) {
266
- { int c = z->c; /* do, line 66 */
267
- { int ret = r_mark_regions(z);
268
- if (ret == 0) goto lab0; /* call mark_regions, line 66 */
269
- if (ret < 0) return ret;
270
- }
271
- lab0:
272
- z->c = c;
273
- }
274
- z->lb = z->c; z->c = z->l; /* backwards, line 67 */
275
-
276
- { int m = z->l - z->c; (void) m; /* do, line 68 */
277
- { int ret = r_main_suffix(z);
278
- if (ret == 0) goto lab1; /* call main_suffix, line 68 */
279
- if (ret < 0) return ret;
280
- }
281
- lab1:
282
- z->c = z->l - m;
283
- }
284
- { int m = z->l - z->c; (void) m; /* do, line 69 */
285
- { int ret = r_consonant_pair(z);
286
- if (ret == 0) goto lab2; /* call consonant_pair, line 69 */
287
- if (ret < 0) return ret;
288
- }
289
- lab2:
290
- z->c = z->l - m;
291
- }
292
- { int m = z->l - z->c; (void) m; /* do, line 70 */
293
- { int ret = r_other_suffix(z);
294
- if (ret == 0) goto lab3; /* call other_suffix, line 70 */
295
- if (ret < 0) return ret;
296
- }
297
- lab3:
298
- z->c = z->l - m;
299
- }
300
- z->c = z->lb;
301
- return 1;
302
- }
303
-
304
- extern struct SN_env * swedish_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); }
305
-
306
- extern void swedish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z); }
307
-
@@ -1,344 +0,0 @@
1
-
2
- /* This file was generated automatically by the Snowball to ANSI C compiler */
3
-
4
- #include "header.h"
5
-
6
- extern int danish_UTF_8_stem(struct SN_env * z);
7
- static int r_undouble(struct SN_env * z);
8
- static int r_other_suffix(struct SN_env * z);
9
- static int r_consonant_pair(struct SN_env * z);
10
- static int r_main_suffix(struct SN_env * z);
11
- static int r_mark_regions(struct SN_env * z);
12
-
13
- extern struct SN_env * danish_UTF_8_create_env(void);
14
- extern void danish_UTF_8_close_env(struct SN_env * z);
15
-
16
- static symbol s_0_0[3] = { 'h', 'e', 'd' };
17
- static symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' };
18
- static symbol s_0_2[4] = { 'e', 'r', 'e', 'd' };
19
- static symbol s_0_3[1] = { 'e' };
20
- static symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' };
21
- static symbol s_0_5[4] = { 'e', 'n', 'd', 'e' };
22
- static symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' };
23
- static symbol s_0_7[3] = { 'e', 'n', 'e' };
24
- static symbol s_0_8[4] = { 'e', 'r', 'n', 'e' };
25
- static symbol s_0_9[3] = { 'e', 'r', 'e' };
26
- static symbol s_0_10[2] = { 'e', 'n' };
27
- static symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' };
28
- static symbol s_0_12[4] = { 'e', 'r', 'e', 'n' };
29
- static symbol s_0_13[2] = { 'e', 'r' };
30
- static symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' };
31
- static symbol s_0_15[4] = { 'e', 'r', 'e', 'r' };
32
- static symbol s_0_16[1] = { 's' };
33
- static symbol s_0_17[4] = { 'h', 'e', 'd', 's' };
34
- static symbol s_0_18[2] = { 'e', 's' };
35
- static symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' };
36
- static symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' };
37
- static symbol s_0_21[4] = { 'e', 'n', 'e', 's' };
38
- static symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' };
39
- static symbol s_0_23[4] = { 'e', 'r', 'e', 's' };
40
- static symbol s_0_24[3] = { 'e', 'n', 's' };
41
- static symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' };
42
- static symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' };
43
- static symbol s_0_27[3] = { 'e', 'r', 's' };
44
- static symbol s_0_28[3] = { 'e', 't', 's' };
45
- static symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' };
46
- static symbol s_0_30[2] = { 'e', 't' };
47
- static symbol s_0_31[4] = { 'e', 'r', 'e', 't' };
48
-
49
- static struct among a_0[32] =
50
- {
51
- /* 0 */ { 3, s_0_0, -1, 1, 0},
52
- /* 1 */ { 5, s_0_1, 0, 1, 0},
53
- /* 2 */ { 4, s_0_2, -1, 1, 0},
54
- /* 3 */ { 1, s_0_3, -1, 1, 0},
55
- /* 4 */ { 5, s_0_4, 3, 1, 0},
56
- /* 5 */ { 4, s_0_5, 3, 1, 0},
57
- /* 6 */ { 6, s_0_6, 5, 1, 0},
58
- /* 7 */ { 3, s_0_7, 3, 1, 0},
59
- /* 8 */ { 4, s_0_8, 3, 1, 0},
60
- /* 9 */ { 3, s_0_9, 3, 1, 0},
61
- /* 10 */ { 2, s_0_10, -1, 1, 0},
62
- /* 11 */ { 5, s_0_11, 10, 1, 0},
63
- /* 12 */ { 4, s_0_12, 10, 1, 0},
64
- /* 13 */ { 2, s_0_13, -1, 1, 0},
65
- /* 14 */ { 5, s_0_14, 13, 1, 0},
66
- /* 15 */ { 4, s_0_15, 13, 1, 0},
67
- /* 16 */ { 1, s_0_16, -1, 2, 0},
68
- /* 17 */ { 4, s_0_17, 16, 1, 0},
69
- /* 18 */ { 2, s_0_18, 16, 1, 0},
70
- /* 19 */ { 5, s_0_19, 18, 1, 0},
71
- /* 20 */ { 7, s_0_20, 19, 1, 0},
72
- /* 21 */ { 4, s_0_21, 18, 1, 0},
73
- /* 22 */ { 5, s_0_22, 18, 1, 0},
74
- /* 23 */ { 4, s_0_23, 18, 1, 0},
75
- /* 24 */ { 3, s_0_24, 16, 1, 0},
76
- /* 25 */ { 6, s_0_25, 24, 1, 0},
77
- /* 26 */ { 5, s_0_26, 24, 1, 0},
78
- /* 27 */ { 3, s_0_27, 16, 1, 0},
79
- /* 28 */ { 3, s_0_28, 16, 1, 0},
80
- /* 29 */ { 5, s_0_29, 28, 1, 0},
81
- /* 30 */ { 2, s_0_30, -1, 1, 0},
82
- /* 31 */ { 4, s_0_31, 30, 1, 0}
83
- };
84
-
85
- static symbol s_1_0[2] = { 'g', 'd' };
86
- static symbol s_1_1[2] = { 'd', 't' };
87
- static symbol s_1_2[2] = { 'g', 't' };
88
- static symbol s_1_3[2] = { 'k', 't' };
89
-
90
- static struct among a_1[4] =
91
- {
92
- /* 0 */ { 2, s_1_0, -1, -1, 0},
93
- /* 1 */ { 2, s_1_1, -1, -1, 0},
94
- /* 2 */ { 2, s_1_2, -1, -1, 0},
95
- /* 3 */ { 2, s_1_3, -1, -1, 0}
96
- };
97
-
98
- static symbol s_2_0[2] = { 'i', 'g' };
99
- static symbol s_2_1[3] = { 'l', 'i', 'g' };
100
- static symbol s_2_2[4] = { 'e', 'l', 'i', 'g' };
101
- static symbol s_2_3[3] = { 'e', 'l', 's' };
102
- static symbol s_2_4[5] = { 'l', 0xC3, 0xB8, 's', 't' };
103
-
104
- static struct among a_2[5] =
105
- {
106
- /* 0 */ { 2, s_2_0, -1, 1, 0},
107
- /* 1 */ { 3, s_2_1, 0, 1, 0},
108
- /* 2 */ { 4, s_2_2, 1, 1, 0},
109
- /* 3 */ { 3, s_2_3, -1, 1, 0},
110
- /* 4 */ { 5, s_2_4, -1, 2, 0}
111
- };
112
-
113
- static unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
114
-
115
- static unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
116
-
117
- static symbol s_0[] = { 's', 't' };
118
- static symbol s_1[] = { 'i', 'g' };
119
- static symbol s_2[] = { 'l', 0xC3, 0xB8, 's' };
120
-
121
- static int r_mark_regions(struct SN_env * z) {
122
- z->I[0] = z->l;
123
- { int c_test = z->c; /* test, line 33 */
124
- { int c = skip_utf8(z->p, z->c, 0, z->l, + 3);
125
- if (c < 0) return 0;
126
- z->c = c; /* hop, line 33 */
127
- }
128
- z->I[1] = z->c; /* setmark x, line 33 */
129
- z->c = c_test;
130
- }
131
- while(1) { /* goto, line 34 */
132
- int c = z->c;
133
- if (!(in_grouping_U(z, g_v, 97, 248))) goto lab0;
134
- z->c = c;
135
- break;
136
- lab0:
137
- z->c = c;
138
- { int c = skip_utf8(z->p, z->c, 0, z->l, 1);
139
- if (c < 0) return 0;
140
- z->c = c; /* goto, line 34 */
141
- }
142
- }
143
- while(1) { /* gopast, line 34 */
144
- if (!(out_grouping_U(z, g_v, 97, 248))) goto lab1;
145
- break;
146
- lab1:
147
- { int c = skip_utf8(z->p, z->c, 0, z->l, 1);
148
- if (c < 0) return 0;
149
- z->c = c; /* gopast, line 34 */
150
- }
151
- }
152
- z->I[0] = z->c; /* setmark p1, line 34 */
153
- /* try, line 35 */
154
- if (!(z->I[0] < z->I[1])) goto lab2;
155
- z->I[0] = z->I[1];
156
- lab2:
157
- return 1;
158
- }
159
-
160
- static int r_main_suffix(struct SN_env * z) {
161
- int among_var;
162
- { int m3; /* setlimit, line 41 */
163
- int m = z->l - z->c; (void) m;
164
- if (z->c < z->I[0]) return 0;
165
- z->c = z->I[0]; /* tomark, line 41 */
166
- m3 = z->lb; z->lb = z->c;
167
- z->c = z->l - m;
168
- z->ket = z->c; /* [, line 41 */
169
- among_var = find_among_b(z, a_0, 32); /* substring, line 41 */
170
- if (!(among_var)) { z->lb = m3; return 0; }
171
- z->bra = z->c; /* ], line 41 */
172
- z->lb = m3;
173
- }
174
- switch(among_var) {
175
- case 0: return 0;
176
- case 1:
177
- { int ret;
178
- ret = slice_del(z); /* delete, line 48 */
179
- if (ret < 0) return ret;
180
- }
181
- break;
182
- case 2:
183
- if (!(in_grouping_b_U(z, g_s_ending, 97, 229))) return 0;
184
- { int ret;
185
- ret = slice_del(z); /* delete, line 50 */
186
- if (ret < 0) return ret;
187
- }
188
- break;
189
- }
190
- return 1;
191
- }
192
-
193
- static int r_consonant_pair(struct SN_env * z) {
194
- { int m_test = z->l - z->c; /* test, line 55 */
195
- { int m3; /* setlimit, line 56 */
196
- int m = z->l - z->c; (void) m;
197
- if (z->c < z->I[0]) return 0;
198
- z->c = z->I[0]; /* tomark, line 56 */
199
- m3 = z->lb; z->lb = z->c;
200
- z->c = z->l - m;
201
- z->ket = z->c; /* [, line 56 */
202
- if (!(find_among_b(z, a_1, 4))) { z->lb = m3; return 0; } /* substring, line 56 */
203
- z->bra = z->c; /* ], line 56 */
204
- z->lb = m3;
205
- }
206
- z->c = z->l - m_test;
207
- }
208
- { int c = skip_utf8(z->p, z->c, z->lb, 0, -1);
209
- if (c < 0) return 0;
210
- z->c = c; /* next, line 62 */
211
- }
212
- z->bra = z->c; /* ], line 62 */
213
- { int ret;
214
- ret = slice_del(z); /* delete, line 62 */
215
- if (ret < 0) return ret;
216
- }
217
- return 1;
218
- }
219
-
220
- static int r_other_suffix(struct SN_env * z) {
221
- int among_var;
222
- { int m = z->l - z->c; (void) m; /* do, line 66 */
223
- z->ket = z->c; /* [, line 66 */
224
- if (!(eq_s_b(z, 2, s_0))) goto lab0;
225
- z->bra = z->c; /* ], line 66 */
226
- if (!(eq_s_b(z, 2, s_1))) goto lab0;
227
- { int ret;
228
- ret = slice_del(z); /* delete, line 66 */
229
- if (ret < 0) return ret;
230
- }
231
- lab0:
232
- z->c = z->l - m;
233
- }
234
- { int m3; /* setlimit, line 67 */
235
- int m = z->l - z->c; (void) m;
236
- if (z->c < z->I[0]) return 0;
237
- z->c = z->I[0]; /* tomark, line 67 */
238
- m3 = z->lb; z->lb = z->c;
239
- z->c = z->l - m;
240
- z->ket = z->c; /* [, line 67 */
241
- among_var = find_among_b(z, a_2, 5); /* substring, line 67 */
242
- if (!(among_var)) { z->lb = m3; return 0; }
243
- z->bra = z->c; /* ], line 67 */
244
- z->lb = m3;
245
- }
246
- switch(among_var) {
247
- case 0: return 0;
248
- case 1:
249
- { int ret;
250
- ret = slice_del(z); /* delete, line 70 */
251
- if (ret < 0) return ret;
252
- }
253
- { int m = z->l - z->c; (void) m; /* do, line 70 */
254
- { int ret = r_consonant_pair(z);
255
- if (ret == 0) goto lab1; /* call consonant_pair, line 70 */
256
- if (ret < 0) return ret;
257
- }
258
- lab1:
259
- z->c = z->l - m;
260
- }
261
- break;
262
- case 2:
263
- { int ret;
264
- ret = slice_from_s(z, 4, s_2); /* <-, line 72 */
265
- if (ret < 0) return ret;
266
- }
267
- break;
268
- }
269
- return 1;
270
- }
271
-
272
- static int r_undouble(struct SN_env * z) {
273
- { int m3; /* setlimit, line 76 */
274
- int m = z->l - z->c; (void) m;
275
- if (z->c < z->I[0]) return 0;
276
- z->c = z->I[0]; /* tomark, line 76 */
277
- m3 = z->lb; z->lb = z->c;
278
- z->c = z->l - m;
279
- z->ket = z->c; /* [, line 76 */
280
- if (!(out_grouping_b_U(z, g_v, 97, 248))) { z->lb = m3; return 0; }
281
- z->bra = z->c; /* ], line 76 */
282
- z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */
283
- if (z->S[0] == 0) return -1; /* -> ch, line 76 */
284
- z->lb = m3;
285
- }
286
- if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */
287
- { int ret;
288
- ret = slice_del(z); /* delete, line 78 */
289
- if (ret < 0) return ret;
290
- }
291
- return 1;
292
- }
293
-
294
- extern int danish_UTF_8_stem(struct SN_env * z) {
295
- { int c = z->c; /* do, line 84 */
296
- { int ret = r_mark_regions(z);
297
- if (ret == 0) goto lab0; /* call mark_regions, line 84 */
298
- if (ret < 0) return ret;
299
- }
300
- lab0:
301
- z->c = c;
302
- }
303
- z->lb = z->c; z->c = z->l; /* backwards, line 85 */
304
-
305
- { int m = z->l - z->c; (void) m; /* do, line 86 */
306
- { int ret = r_main_suffix(z);
307
- if (ret == 0) goto lab1; /* call main_suffix, line 86 */
308
- if (ret < 0) return ret;
309
- }
310
- lab1:
311
- z->c = z->l - m;
312
- }
313
- { int m = z->l - z->c; (void) m; /* do, line 87 */
314
- { int ret = r_consonant_pair(z);
315
- if (ret == 0) goto lab2; /* call consonant_pair, line 87 */
316
- if (ret < 0) return ret;
317
- }
318
- lab2:
319
- z->c = z->l - m;
320
- }
321
- { int m = z->l - z->c; (void) m; /* do, line 88 */
322
- { int ret = r_other_suffix(z);
323
- if (ret == 0) goto lab3; /* call other_suffix, line 88 */
324
- if (ret < 0) return ret;
325
- }
326
- lab3:
327
- z->c = z->l - m;
328
- }
329
- { int m = z->l - z->c; (void) m; /* do, line 89 */
330
- { int ret = r_undouble(z);
331
- if (ret == 0) goto lab4; /* call undouble, line 89 */
332
- if (ret < 0) return ret;
333
- }
334
- lab4:
335
- z->c = z->l - m;
336
- }
337
- z->c = z->lb;
338
- return 1;
339
- }
340
-
341
- extern struct SN_env * danish_UTF_8_create_env(void) { return SN_create_env(1, 2, 0); }
342
-
343
- extern void danish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z); }
344
-