ruby-stemmer 0.8.1 → 0.8.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,6 +4,8 @@ Ruby-Stemmer exposes SnowBall API to Ruby.
4
4
 
5
5
  This package includes libstemmer_c library released under BSD licence
6
6
  and available for free at: http://snowball.tartarus.org/dist/libstemmer_c.tgz.
7
+ Support for latin language is also included and it has been generated with the snowball compiler using
8
+ {schinke contribution}[http://snowball.tartarus.org/otherapps/schinke/intro.html]
7
9
 
8
10
  For more details about libstemmer_c please visit the {SnowBall website}[http://snowball.tartarus.org].
9
11
 
@@ -87,7 +89,7 @@ For further reference on stem vs. root, please check wikipedia articles on the t
87
89
 
88
90
  == Copyright
89
91
 
90
- Copyright (c) 2008,2009 {Aurelian Oancea}[http://locknet.ro]. See MIT-LICENSE for details.
92
+ Copyright (c) 2008-2010 {Aurelian Oancea}[http://locknet.ro]. See MIT-LICENSE for details.
91
93
 
92
94
  == Contributors
93
95
 
@@ -98,4 +100,4 @@ Copyright (c) 2008,2009 {Aurelian Oancea}[http://locknet.ro]. See MIT-LICENSE fo
98
100
  == Real life usage
99
101
 
100
102
  * http://planet33.ru is using Ruby-Stemmer together with {Classifier}[http://github.com/yury/classifier] to automatically rate places based on users comments.
101
-
103
+ * {textamatch_rb}[http://github.com/dimus/taxamatch_rb] is using the Ruby-Stemmer to catch errors in suffixes while it discovers if two scientific names are actually the same.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.1
1
+ 0.8.2
@@ -0,0 +1 @@
1
+ lorem
@@ -3,11 +3,13 @@
3
3
  * This file is generated by mkmodules.pl from a list of module names.
4
4
  * Do not edit manually.
5
5
  *
6
- * Modules included by this file are: danish, dutch, english, finnish, french,
6
+ * Modules included by this file are: latin, danish, dutch, english, finnish, french,
7
7
  * german, hungarian, italian, norwegian, porter, portuguese, romanian,
8
8
  * russian, spanish, swedish, turkish
9
9
  */
10
10
 
11
+ #include "../src_c/stem_ISO_8859_1_latin.h"
12
+ #include "../src_c/stem_UTF_8_latin.h"
11
13
  #include "../src_c/stem_ISO_8859_1_danish.h"
12
14
  #include "../src_c/stem_UTF_8_danish.h"
13
15
  #include "../src_c/stem_ISO_8859_1_dutch.h"
@@ -68,6 +70,8 @@ struct stemmer_modules {
68
70
  int (*stem)(struct SN_env *);
69
71
  };
70
72
  static struct stemmer_modules modules[] = {
73
+ {"latin", ENC_ISO_8859_1, latin_ISO_8859_1_create_env, latin_ISO_8859_1_close_env, latin_ISO_8859_1_stem},
74
+ {"latin", ENC_UTF_8, latin_UTF_8_create_env, latin_UTF_8_close_env, latin_UTF_8_stem},
71
75
  {"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
72
76
  {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
73
77
  {"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
@@ -170,6 +174,7 @@ static struct stemmer_modules modules[] = {
170
174
  {0,ENC_UNKNOWN,0,0,0}
171
175
  };
172
176
  static const char * algorithm_names[] = {
177
+ "latin",
173
178
  "danish",
174
179
  "dutch",
175
180
  "english",
@@ -9,6 +9,7 @@
9
9
  # List all the main algorithms for each language, in UTF-8, and also with
10
10
  # the most commonly used encoding.
11
11
 
12
+ latin UTF_8,ISO_8859_1 latin
12
13
  danish UTF_8,ISO_8859_1 danish,da,dan
13
14
  dutch UTF_8,ISO_8859_1 dutch,nl,dut,nld
14
15
  english UTF_8,ISO_8859_1 english,en,eng
@@ -3,11 +3,12 @@
3
3
  * This file is generated by mkmodules.pl from a list of module names.
4
4
  * Do not edit manually.
5
5
  *
6
- * Modules included by this file are: danish, dutch, english, finnish, french,
6
+ * Modules included by this file are: latin, danish, dutch, english, finnish, french,
7
7
  * german, hungarian, italian, norwegian, porter, portuguese, romanian,
8
8
  * russian, spanish, swedish, turkish
9
9
  */
10
10
 
11
+ #include "../src_c/stem_UTF_8_latin.h"
11
12
  #include "../src_c/stem_UTF_8_danish.h"
12
13
  #include "../src_c/stem_UTF_8_dutch.h"
13
14
  #include "../src_c/stem_UTF_8_english.h"
@@ -47,6 +48,7 @@ struct stemmer_modules {
47
48
  int (*stem)(struct SN_env *);
48
49
  };
49
50
  static struct stemmer_modules modules[] = {
51
+ {"latin", ENC_UTF_8, latin_UTF_8_create_env, latin_UTF_8_close_env, latin_UTF_8_stem},
50
52
  {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
51
53
  {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
52
54
  {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
@@ -8,6 +8,7 @@
8
8
 
9
9
  # List all the main algorithms for each language, in UTF-8.
10
10
 
11
+ latin UTF_8 latin
11
12
  danish UTF_8 danish,da,dan
12
13
  dutch UTF_8 dutch,nl,dut,nld
13
14
  english UTF_8 english,en,eng
@@ -3,11 +3,13 @@
3
3
  # This file is generated by mkmodules.pl from a list of module names.
4
4
  # Do not edit manually.
5
5
  #
6
- # Modules included by this file are: danish, dutch, english, finnish, french,
6
+ # Modules included by this file are: latin, danish, dutch, english, finnish, french,
7
7
  # german, hungarian, italian, norwegian, porter, portuguese, romanian,
8
8
  # russian, spanish, swedish, turkish
9
9
 
10
10
  snowball_sources= \
11
+ src_c/stem_ISO_8859_1_latin.c \
12
+ src_c/stem_UTF_8_latin.c \
11
13
  src_c/stem_ISO_8859_1_danish.c \
12
14
  src_c/stem_UTF_8_danish.c \
13
15
  src_c/stem_ISO_8859_1_dutch.c \
@@ -44,6 +46,8 @@ snowball_sources= \
44
46
  libstemmer/libstemmer.c
45
47
 
46
48
  snowball_headers= \
49
+ src_c/stem_ISO_8859_1_latin.h \
50
+ src_c/stem_UTF_8_latin.h \
47
51
  src_c/stem_ISO_8859_1_danish.h \
48
52
  src_c/stem_UTF_8_danish.h \
49
53
  src_c/stem_ISO_8859_1_dutch.h \
@@ -8,6 +8,7 @@
8
8
  # russian, spanish, swedish, turkish
9
9
 
10
10
  snowball_sources= \
11
+ src_c/stem_UTF_8_latin.c \
11
12
  src_c/stem_UTF_8_danish.c \
12
13
  src_c/stem_UTF_8_dutch.c \
13
14
  src_c/stem_UTF_8_english.c \
@@ -29,6 +30,7 @@ snowball_sources= \
29
30
  libstemmer/libstemmer_utf8.c
30
31
 
31
32
  snowball_headers= \
33
+ src_c/stem_UTF_8_latin.h \
32
34
  src_c/stem_UTF_8_danish.h \
33
35
  src_c/stem_UTF_8_dutch.h \
34
36
  src_c/stem_UTF_8_english.h \
@@ -0,0 +1,443 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #include "../runtime/header.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+ extern int latin_ISO_8859_1_stem(struct SN_env * z);
10
+ #ifdef __cplusplus
11
+ }
12
+ #endif
13
+ static int r_que_word(struct SN_env * z);
14
+ static int r_map_letters(struct SN_env * z);
15
+ #ifdef __cplusplus
16
+ extern "C" {
17
+ #endif
18
+
19
+
20
+ extern struct SN_env * latin_ISO_8859_1_create_env(void);
21
+ extern void latin_ISO_8859_1_close_env(struct SN_env * z);
22
+
23
+
24
+ #ifdef __cplusplus
25
+ }
26
+ #endif
27
+ static const symbol s_0_0[3] = { 'i', 't', 'a' };
28
+ static const symbol s_0_1[3] = { 'q', 'u', 'a' };
29
+ static const symbol s_0_2[4] = { 'a', 'd', 'a', 'e' };
30
+ static const symbol s_0_3[5] = { 'p', 'e', 'r', 'a', 'e' };
31
+ static const symbol s_0_4[4] = { 'q', 'u', 'a', 'e' };
32
+ static const symbol s_0_5[2] = { 'd', 'e' };
33
+ static const symbol s_0_6[2] = { 'n', 'e' };
34
+ static const symbol s_0_7[6] = { 'u', 't', 'r', 'i', 'b', 'i' };
35
+ static const symbol s_0_8[3] = { 'u', 'b', 'i' };
36
+ static const symbol s_0_9[4] = { 'u', 'n', 'd', 'i' };
37
+ static const symbol s_0_10[4] = { 'o', 'b', 'l', 'i' };
38
+ static const symbol s_0_11[4] = { 'd', 'e', 'n', 'i' };
39
+ static const symbol s_0_12[3] = { 'u', 't', 'i' };
40
+ static const symbol s_0_13[3] = { 'c', 'u', 'i' };
41
+ static const symbol s_0_14[3] = { 'q', 'u', 'i' };
42
+ static const symbol s_0_15[4] = { 'q', 'u', 'a', 'm' };
43
+ static const symbol s_0_16[4] = { 'q', 'u', 'e', 'm' };
44
+ static const symbol s_0_17[6] = { 'q', 'u', 'a', 'r', 'u', 'm' };
45
+ static const symbol s_0_18[6] = { 'q', 'u', 'o', 'r', 'u', 'm' };
46
+ static const symbol s_0_19[2] = { 'c', 'o' };
47
+ static const symbol s_0_20[4] = { 'd', 'e', 'c', 'o' };
48
+ static const symbol s_0_21[4] = { 'r', 'e', 'c', 'o' };
49
+ static const symbol s_0_22[4] = { 'i', 'n', 'c', 'o' };
50
+ static const symbol s_0_23[5] = { 'c', 'o', 'n', 'c', 'o' };
51
+ static const symbol s_0_24[4] = { 'e', 'x', 'c', 'o' };
52
+ static const symbol s_0_25[6] = { 'q', 'u', 'a', 'n', 'd', 'o' };
53
+ static const symbol s_0_26[4] = { 'u', 't', 'r', 'o' };
54
+ static const symbol s_0_27[3] = { 'q', 'u', 'o' };
55
+ static const symbol s_0_28[4] = { 'u', 't', 'e', 'r' };
56
+ static const symbol s_0_29[3] = { 't', 'o', 'r' };
57
+ static const symbol s_0_30[5] = { 'o', 'b', 't', 'o', 'r' };
58
+ static const symbol s_0_31[7] = { 'p', 'r', 'a', 'e', 't', 'o', 'r' };
59
+ static const symbol s_0_32[5] = { 'd', 'e', 't', 'o', 'r' };
60
+ static const symbol s_0_33[5] = { 'r', 'e', 't', 'o', 'r' };
61
+ static const symbol s_0_34[5] = { 'i', 'n', 't', 'o', 'r' };
62
+ static const symbol s_0_35[6] = { 'c', 'o', 'n', 't', 'o', 'r' };
63
+ static const symbol s_0_36[5] = { 'o', 'p', 't', 'o', 'r' };
64
+ static const symbol s_0_37[5] = { 'a', 't', 't', 'o', 'r' };
65
+ static const symbol s_0_38[5] = { 'e', 'x', 't', 'o', 'r' };
66
+ static const symbol s_0_39[4] = { 'q', 'u', 'a', 's' };
67
+ static const symbol s_0_40[3] = { 'a', 'b', 's' };
68
+ static const symbol s_0_41[6] = { 'p', 'l', 'e', 'n', 'i', 's' };
69
+ static const symbol s_0_42[4] = { 'q', 'u', 'i', 's' };
70
+ static const symbol s_0_43[10] = { 'q', 'u', 'o', 't', 'u', 's', 'q', 'u', 'i', 's' };
71
+ static const symbol s_0_44[4] = { 'q', 'u', 'o', 's' };
72
+ static const symbol s_0_45[3] = { 'a', 'p', 's' };
73
+ static const symbol s_0_46[2] = { 'u', 's' };
74
+ static const symbol s_0_47[4] = { 'a', 'b', 'u', 's' };
75
+ static const symbol s_0_48[6] = { 'q', 'u', 'i', 'b', 'u', 's' };
76
+ static const symbol s_0_49[4] = { 'a', 'd', 'u', 's' };
77
+ static const symbol s_0_50[5] = { 'c', 'u', 'i', 'u', 's' };
78
+ static const symbol s_0_51[5] = { 'q', 'u', 'o', 'u', 's' };
79
+ static const symbol s_0_52[3] = { 's', 'u', 's' };
80
+ static const symbol s_0_53[2] = { 'a', 't' };
81
+
82
+ static const struct among a_0[54] =
83
+ {
84
+ /* 0 */ { 3, s_0_0, -1, -1, 0},
85
+ /* 1 */ { 3, s_0_1, -1, -1, 0},
86
+ /* 2 */ { 4, s_0_2, -1, -1, 0},
87
+ /* 3 */ { 5, s_0_3, -1, -1, 0},
88
+ /* 4 */ { 4, s_0_4, -1, -1, 0},
89
+ /* 5 */ { 2, s_0_5, -1, -1, 0},
90
+ /* 6 */ { 2, s_0_6, -1, -1, 0},
91
+ /* 7 */ { 6, s_0_7, -1, -1, 0},
92
+ /* 8 */ { 3, s_0_8, -1, -1, 0},
93
+ /* 9 */ { 4, s_0_9, -1, -1, 0},
94
+ /* 10 */ { 4, s_0_10, -1, -1, 0},
95
+ /* 11 */ { 4, s_0_11, -1, -1, 0},
96
+ /* 12 */ { 3, s_0_12, -1, -1, 0},
97
+ /* 13 */ { 3, s_0_13, -1, -1, 0},
98
+ /* 14 */ { 3, s_0_14, -1, -1, 0},
99
+ /* 15 */ { 4, s_0_15, -1, -1, 0},
100
+ /* 16 */ { 4, s_0_16, -1, -1, 0},
101
+ /* 17 */ { 6, s_0_17, -1, -1, 0},
102
+ /* 18 */ { 6, s_0_18, -1, -1, 0},
103
+ /* 19 */ { 2, s_0_19, -1, -1, 0},
104
+ /* 20 */ { 4, s_0_20, 19, -1, 0},
105
+ /* 21 */ { 4, s_0_21, 19, -1, 0},
106
+ /* 22 */ { 4, s_0_22, 19, -1, 0},
107
+ /* 23 */ { 5, s_0_23, 19, -1, 0},
108
+ /* 24 */ { 4, s_0_24, 19, -1, 0},
109
+ /* 25 */ { 6, s_0_25, -1, -1, 0},
110
+ /* 26 */ { 4, s_0_26, -1, -1, 0},
111
+ /* 27 */ { 3, s_0_27, -1, -1, 0},
112
+ /* 28 */ { 4, s_0_28, -1, -1, 0},
113
+ /* 29 */ { 3, s_0_29, -1, -1, 0},
114
+ /* 30 */ { 5, s_0_30, 29, -1, 0},
115
+ /* 31 */ { 7, s_0_31, 29, -1, 0},
116
+ /* 32 */ { 5, s_0_32, 29, -1, 0},
117
+ /* 33 */ { 5, s_0_33, 29, -1, 0},
118
+ /* 34 */ { 5, s_0_34, 29, -1, 0},
119
+ /* 35 */ { 6, s_0_35, 29, -1, 0},
120
+ /* 36 */ { 5, s_0_36, 29, -1, 0},
121
+ /* 37 */ { 5, s_0_37, 29, -1, 0},
122
+ /* 38 */ { 5, s_0_38, 29, -1, 0},
123
+ /* 39 */ { 4, s_0_39, -1, -1, 0},
124
+ /* 40 */ { 3, s_0_40, -1, -1, 0},
125
+ /* 41 */ { 6, s_0_41, -1, -1, 0},
126
+ /* 42 */ { 4, s_0_42, -1, -1, 0},
127
+ /* 43 */ { 10, s_0_43, 42, -1, 0},
128
+ /* 44 */ { 4, s_0_44, -1, -1, 0},
129
+ /* 45 */ { 3, s_0_45, -1, -1, 0},
130
+ /* 46 */ { 2, s_0_46, -1, -1, 0},
131
+ /* 47 */ { 4, s_0_47, 46, -1, 0},
132
+ /* 48 */ { 6, s_0_48, 46, -1, 0},
133
+ /* 49 */ { 4, s_0_49, 46, -1, 0},
134
+ /* 50 */ { 5, s_0_50, 46, -1, 0},
135
+ /* 51 */ { 5, s_0_51, 46, -1, 0},
136
+ /* 52 */ { 3, s_0_52, 46, -1, 0},
137
+ /* 53 */ { 2, s_0_53, -1, -1, 0}
138
+ };
139
+
140
+ static const symbol s_1_0[1] = { 'a' };
141
+ static const symbol s_1_1[2] = { 'i', 'a' };
142
+ static const symbol s_1_2[2] = { 'u', 'd' };
143
+ static const symbol s_1_3[1] = { 'e' };
144
+ static const symbol s_1_4[2] = { 'a', 'e' };
145
+ static const symbol s_1_5[1] = { 'i' };
146
+ static const symbol s_1_6[2] = { 'a', 'm' };
147
+ static const symbol s_1_7[2] = { 'e', 'm' };
148
+ static const symbol s_1_8[2] = { 'u', 'm' };
149
+ static const symbol s_1_9[1] = { 'o' };
150
+ static const symbol s_1_10[2] = { 'a', 's' };
151
+ static const symbol s_1_11[2] = { 'e', 's' };
152
+ static const symbol s_1_12[2] = { 'i', 's' };
153
+ static const symbol s_1_13[2] = { 'o', 's' };
154
+ static const symbol s_1_14[2] = { 'u', 's' };
155
+ static const symbol s_1_15[4] = { 'i', 'b', 'u', 's' };
156
+ static const symbol s_1_16[3] = { 'i', 'u', 's' };
157
+ static const symbol s_1_17[2] = { 'n', 't' };
158
+ static const symbol s_1_18[1] = { 'u' };
159
+
160
+ static const struct among a_1[19] =
161
+ {
162
+ /* 0 */ { 1, s_1_0, -1, 1, 0},
163
+ /* 1 */ { 2, s_1_1, 0, 1, 0},
164
+ /* 2 */ { 2, s_1_2, -1, 1, 0},
165
+ /* 3 */ { 1, s_1_3, -1, 1, 0},
166
+ /* 4 */ { 2, s_1_4, 3, 1, 0},
167
+ /* 5 */ { 1, s_1_5, -1, 1, 0},
168
+ /* 6 */ { 2, s_1_6, -1, 1, 0},
169
+ /* 7 */ { 2, s_1_7, -1, 1, 0},
170
+ /* 8 */ { 2, s_1_8, -1, 1, 0},
171
+ /* 9 */ { 1, s_1_9, -1, 1, 0},
172
+ /* 10 */ { 2, s_1_10, -1, 1, 0},
173
+ /* 11 */ { 2, s_1_11, -1, 1, 0},
174
+ /* 12 */ { 2, s_1_12, -1, 1, 0},
175
+ /* 13 */ { 2, s_1_13, -1, 1, 0},
176
+ /* 14 */ { 2, s_1_14, -1, 1, 0},
177
+ /* 15 */ { 4, s_1_15, 14, 1, 0},
178
+ /* 16 */ { 3, s_1_16, 14, 1, 0},
179
+ /* 17 */ { 2, s_1_17, -1, 1, 0},
180
+ /* 18 */ { 1, s_1_18, -1, 1, 0}
181
+ };
182
+
183
+ static const symbol s_2_0[4] = { 'm', 'i', 'n', 'i' };
184
+ static const symbol s_2_1[2] = { 'r', 'i' };
185
+ static const symbol s_2_2[3] = { 's', 't', 'i' };
186
+ static const symbol s_2_3[1] = { 'm' };
187
+ static const symbol s_2_4[2] = { 'b', 'o' };
188
+ static const symbol s_2_5[3] = { 'e', 'r', 'o' };
189
+ static const symbol s_2_6[1] = { 'r' };
190
+ static const symbol s_2_7[3] = { 'b', 'o', 'r' };
191
+ static const symbol s_2_8[3] = { 'm', 'u', 'r' };
192
+ static const symbol s_2_9[3] = { 't', 'u', 'r' };
193
+ static const symbol s_2_10[4] = { 'n', 't', 'u', 'r' };
194
+ static const symbol s_2_11[5] = { 'u', 'n', 't', 'u', 'r' };
195
+ static const symbol s_2_12[6] = { 'i', 'u', 'n', 't', 'u', 'r' };
196
+ static const symbol s_2_13[1] = { 's' };
197
+ static const symbol s_2_14[3] = { 'r', 'i', 's' };
198
+ static const symbol s_2_15[5] = { 'b', 'e', 'r', 'i', 's' };
199
+ static const symbol s_2_16[3] = { 't', 'i', 's' };
200
+ static const symbol s_2_17[4] = { 's', 't', 'i', 's' };
201
+ static const symbol s_2_18[2] = { 'n', 's' };
202
+ static const symbol s_2_19[3] = { 'm', 'u', 's' };
203
+ static const symbol s_2_20[1] = { 't' };
204
+ static const symbol s_2_21[2] = { 'n', 't' };
205
+ static const symbol s_2_22[3] = { 'u', 'n', 't' };
206
+ static const symbol s_2_23[4] = { 'i', 'u', 'n', 't' };
207
+ static const symbol s_2_24[5] = { 'e', 'r', 'u', 'n', 't' };
208
+
209
+ static const struct among a_2[25] =
210
+ {
211
+ /* 0 */ { 4, s_2_0, -1, 4, 0},
212
+ /* 1 */ { 2, s_2_1, -1, 4, 0},
213
+ /* 2 */ { 3, s_2_2, -1, 4, 0},
214
+ /* 3 */ { 1, s_2_3, -1, 4, 0},
215
+ /* 4 */ { 2, s_2_4, -1, 2, 0},
216
+ /* 5 */ { 3, s_2_5, -1, 3, 0},
217
+ /* 6 */ { 1, s_2_6, -1, 4, 0},
218
+ /* 7 */ { 3, s_2_7, 6, 2, 0},
219
+ /* 8 */ { 3, s_2_8, 6, 4, 0},
220
+ /* 9 */ { 3, s_2_9, 6, 4, 0},
221
+ /* 10 */ { 4, s_2_10, 9, 4, 0},
222
+ /* 11 */ { 5, s_2_11, 10, 1, 0},
223
+ /* 12 */ { 6, s_2_12, 11, 1, 0},
224
+ /* 13 */ { 1, s_2_13, -1, 4, 0},
225
+ /* 14 */ { 3, s_2_14, 13, 4, 0},
226
+ /* 15 */ { 5, s_2_15, 14, 2, 0},
227
+ /* 16 */ { 3, s_2_16, 13, 4, 0},
228
+ /* 17 */ { 4, s_2_17, 16, 4, 0},
229
+ /* 18 */ { 2, s_2_18, 13, 4, 0},
230
+ /* 19 */ { 3, s_2_19, 13, 4, 0},
231
+ /* 20 */ { 1, s_2_20, -1, 4, 0},
232
+ /* 21 */ { 2, s_2_21, 20, 4, 0},
233
+ /* 22 */ { 3, s_2_22, 21, 1, 0},
234
+ /* 23 */ { 4, s_2_23, 22, 1, 0},
235
+ /* 24 */ { 5, s_2_24, 22, 1, 0}
236
+ };
237
+
238
+ static const symbol s_0[] = { 'j' };
239
+ static const symbol s_1[] = { 'i' };
240
+ static const symbol s_2[] = { 'v' };
241
+ static const symbol s_3[] = { 'u' };
242
+ static const symbol s_4[] = { 'q', 'u', 'e' };
243
+ static const symbol s_5[] = { 'i' };
244
+ static const symbol s_6[] = { 'b', 'i' };
245
+ static const symbol s_7[] = { 'e', 'r', 'i' };
246
+
247
+ static int r_map_letters(struct SN_env * z) {
248
+ { int c1 = z->c; /* do, line 14 */
249
+ while(1) { /* repeat, line 14 */
250
+ int c2 = z->c;
251
+ while(1) { /* goto, line 14 */
252
+ int c3 = z->c;
253
+ z->bra = z->c; /* [, line 14 */
254
+ if (!(eq_s(z, 1, s_0))) goto lab2;
255
+ z->ket = z->c; /* ], line 14 */
256
+ z->c = c3;
257
+ break;
258
+ lab2:
259
+ z->c = c3;
260
+ if (z->c >= z->l) goto lab1;
261
+ z->c++; /* goto, line 14 */
262
+ }
263
+ { int ret = slice_from_s(z, 1, s_1); /* <-, line 14 */
264
+ if (ret < 0) return ret;
265
+ }
266
+ continue;
267
+ lab1:
268
+ z->c = c2;
269
+ break;
270
+ }
271
+ z->c = c1;
272
+ }
273
+ { int c4 = z->c; /* do, line 15 */
274
+ while(1) { /* repeat, line 15 */
275
+ int c5 = z->c;
276
+ while(1) { /* goto, line 15 */
277
+ int c6 = z->c;
278
+ z->bra = z->c; /* [, line 15 */
279
+ if (!(eq_s(z, 1, s_2))) goto lab5;
280
+ z->ket = z->c; /* ], line 15 */
281
+ z->c = c6;
282
+ break;
283
+ lab5:
284
+ z->c = c6;
285
+ if (z->c >= z->l) goto lab4;
286
+ z->c++; /* goto, line 15 */
287
+ }
288
+ { int ret = slice_from_s(z, 1, s_3); /* <-, line 15 */
289
+ if (ret < 0) return ret;
290
+ }
291
+ continue;
292
+ lab4:
293
+ z->c = c5;
294
+ break;
295
+ }
296
+ z->c = c4;
297
+ }
298
+ return 1;
299
+ }
300
+
301
+ static int r_que_word(struct SN_env * z) {
302
+ z->ket = z->c; /* [, line 22 */
303
+ if (!(eq_s_b(z, 3, s_4))) return 0;
304
+ z->bra = z->c; /* ], line 22 */
305
+ { int m1 = z->l - z->c; (void)m1; /* or, line 35 */
306
+ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1876514 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab1;
307
+ if (!(find_among_b(z, a_0, 54))) goto lab1; /* among, line 23 */
308
+ if (z->c > z->lb) goto lab1; /* atlimit, line 32 */
309
+ z->bra = z->c; /* ], line 32 */
310
+ z->S[0] = assign_to(z, z->S[0]); /* => noun_form, line 33 */
311
+ if (z->S[0] == 0) return -1; /* => noun_form, line 33 */
312
+ z->S[1] = assign_to(z, z->S[1]); /* => verb_form, line 34 */
313
+ if (z->S[1] == 0) return -1; /* => verb_form, line 34 */
314
+ goto lab0;
315
+ lab1:
316
+ z->c = z->l - m1;
317
+ { int ret = slice_del(z); /* delete, line 35 */
318
+ if (ret < 0) return ret;
319
+ }
320
+ return 0; /* fail, line 35 */
321
+ }
322
+ lab0:
323
+ return 1;
324
+ }
325
+
326
+ extern int latin_ISO_8859_1_stem(struct SN_env * z) {
327
+ int among_var;
328
+ { int ret = r_map_letters(z);
329
+ if (ret == 0) return 0; /* call map_letters, line 41 */
330
+ if (ret < 0) return ret;
331
+ }
332
+ z->lb = z->c; z->c = z->l; /* backwards, line 43 */
333
+
334
+ { int m1 = z->l - z->c; (void)m1; /* or, line 44 */
335
+ { int ret = r_que_word(z);
336
+ if (ret == 0) goto lab1; /* call que_word, line 44 */
337
+ if (ret < 0) return ret;
338
+ }
339
+ goto lab0;
340
+ lab1:
341
+ z->c = z->l - m1;
342
+ z->S[0] = assign_to(z, z->S[0]); /* => noun_form, line 45 */
343
+ if (z->S[0] == 0) return -1; /* => noun_form, line 45 */
344
+ z->S[1] = assign_to(z, z->S[1]); /* => verb_form, line 46 */
345
+ if (z->S[1] == 0) return -1; /* => verb_form, line 46 */
346
+ { struct SN_env env = * z; /* $ noun_form, line 48 */
347
+ int failure = 1; /* assume failure */
348
+ z->p = z->S[0];
349
+ z->lb = z->c = 0;
350
+ z->l = SIZE(z->p);
351
+ z->lb = z->c; z->c = z->l; /* backwards, line 48 */
352
+
353
+ { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 48 */
354
+ z->ket = z->c; /* [, line 49 */
355
+ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((3711538 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab3; }
356
+ among_var = find_among_b(z, a_1, 19); /* substring, line 49 */
357
+ if (!(among_var)) { z->c = z->l - m_keep; goto lab3; }
358
+ z->bra = z->c; /* ], line 49 */
359
+ { int ret = z->c - 2;
360
+ if (z->lb > ret || ret > z->l) { z->c = z->l - m_keep; goto lab3; }
361
+ z->c = ret; /* hop, line 49 */
362
+ }
363
+ switch(among_var) {
364
+ case 0: { z->c = z->l - m_keep; goto lab3; }
365
+ case 1:
366
+ { int ret = slice_del(z); /* delete, line 53 */
367
+ if (ret < 0) return ret;
368
+ }
369
+ break;
370
+ }
371
+ lab3:
372
+ ;
373
+ }
374
+ z->c = z->lb;
375
+ failure = 0; /* mark success */
376
+ z->S[0] = z->p;
377
+ * z = env;
378
+ if (failure) return 0;
379
+ }
380
+ { struct SN_env env = * z; /* $ verb_form, line 57 */
381
+ int failure = 1; /* assume failure */
382
+ z->p = z->S[1];
383
+ z->lb = z->c = 0;
384
+ z->l = SIZE(z->p);
385
+ z->lb = z->c; z->c = z->l; /* backwards, line 57 */
386
+
387
+ { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 57 */
388
+ z->ket = z->c; /* [, line 58 */
389
+ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1876480 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab5; }
390
+ among_var = find_among_b(z, a_2, 25); /* substring, line 58 */
391
+ if (!(among_var)) { z->c = z->l - m_keep; goto lab5; }
392
+ z->bra = z->c; /* ], line 58 */
393
+ { int ret = z->c - 2;
394
+ if (z->lb > ret || ret > z->l) { z->c = z->l - m_keep; goto lab5; }
395
+ z->c = ret; /* hop, line 58 */
396
+ }
397
+ switch(among_var) {
398
+ case 0: { z->c = z->l - m_keep; goto lab5; }
399
+ case 1:
400
+ { int ret = slice_from_s(z, 1, s_5); /* <-, line 61 */
401
+ if (ret < 0) return ret;
402
+ }
403
+ break;
404
+ case 2:
405
+ { int ret = slice_from_s(z, 2, s_6); /* <-, line 63 */
406
+ if (ret < 0) return ret;
407
+ }
408
+ break;
409
+ case 3:
410
+ { int ret = slice_from_s(z, 3, s_7); /* <-, line 65 */
411
+ if (ret < 0) return ret;
412
+ }
413
+ break;
414
+ case 4:
415
+ { int ret = slice_del(z); /* delete, line 68 */
416
+ if (ret < 0) return ret;
417
+ }
418
+ break;
419
+ }
420
+ lab5:
421
+ ;
422
+ }
423
+ z->c = z->lb;
424
+ failure = 0; /* mark success */
425
+ z->S[1] = z->p;
426
+ * z = env;
427
+ if (failure) return 0;
428
+ }
429
+ }
430
+ lab0:
431
+ z->c = z->lb;
432
+ { int c_keep = z->c;
433
+ int ret = insert_v(z, z->c, z->l, z->S[0]); /* = noun_form, line 74 */
434
+ z->c = c_keep;
435
+ if (ret < 0) return ret;
436
+ }
437
+ return 1;
438
+ }
439
+
440
+ extern struct SN_env * latin_ISO_8859_1_create_env(void) { return SN_create_env(2, 0, 0); }
441
+
442
+ extern void latin_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 2); }
443
+
@@ -0,0 +1,16 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ extern struct SN_env * latin_ISO_8859_1_create_env(void);
9
+ extern void latin_ISO_8859_1_close_env(struct SN_env * z);
10
+
11
+ extern int latin_ISO_8859_1_stem(struct SN_env * z);
12
+
13
+ #ifdef __cplusplus
14
+ }
15
+ #endif
16
+
@@ -0,0 +1,443 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #include "../runtime/header.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+ extern int latin_UTF_8_stem(struct SN_env * z);
10
+ #ifdef __cplusplus
11
+ }
12
+ #endif
13
+ static int r_que_word(struct SN_env * z);
14
+ static int r_map_letters(struct SN_env * z);
15
+ #ifdef __cplusplus
16
+ extern "C" {
17
+ #endif
18
+
19
+
20
+ extern struct SN_env * latin_UTF_8_create_env(void);
21
+ extern void latin_UTF_8_close_env(struct SN_env * z);
22
+
23
+
24
+ #ifdef __cplusplus
25
+ }
26
+ #endif
27
+ static const symbol s_0_0[3] = { 'i', 't', 'a' };
28
+ static const symbol s_0_1[3] = { 'q', 'u', 'a' };
29
+ static const symbol s_0_2[4] = { 'a', 'd', 'a', 'e' };
30
+ static const symbol s_0_3[5] = { 'p', 'e', 'r', 'a', 'e' };
31
+ static const symbol s_0_4[4] = { 'q', 'u', 'a', 'e' };
32
+ static const symbol s_0_5[2] = { 'd', 'e' };
33
+ static const symbol s_0_6[2] = { 'n', 'e' };
34
+ static const symbol s_0_7[6] = { 'u', 't', 'r', 'i', 'b', 'i' };
35
+ static const symbol s_0_8[3] = { 'u', 'b', 'i' };
36
+ static const symbol s_0_9[4] = { 'u', 'n', 'd', 'i' };
37
+ static const symbol s_0_10[4] = { 'o', 'b', 'l', 'i' };
38
+ static const symbol s_0_11[4] = { 'd', 'e', 'n', 'i' };
39
+ static const symbol s_0_12[3] = { 'u', 't', 'i' };
40
+ static const symbol s_0_13[3] = { 'c', 'u', 'i' };
41
+ static const symbol s_0_14[3] = { 'q', 'u', 'i' };
42
+ static const symbol s_0_15[4] = { 'q', 'u', 'a', 'm' };
43
+ static const symbol s_0_16[4] = { 'q', 'u', 'e', 'm' };
44
+ static const symbol s_0_17[6] = { 'q', 'u', 'a', 'r', 'u', 'm' };
45
+ static const symbol s_0_18[6] = { 'q', 'u', 'o', 'r', 'u', 'm' };
46
+ static const symbol s_0_19[2] = { 'c', 'o' };
47
+ static const symbol s_0_20[4] = { 'd', 'e', 'c', 'o' };
48
+ static const symbol s_0_21[4] = { 'r', 'e', 'c', 'o' };
49
+ static const symbol s_0_22[4] = { 'i', 'n', 'c', 'o' };
50
+ static const symbol s_0_23[5] = { 'c', 'o', 'n', 'c', 'o' };
51
+ static const symbol s_0_24[4] = { 'e', 'x', 'c', 'o' };
52
+ static const symbol s_0_25[6] = { 'q', 'u', 'a', 'n', 'd', 'o' };
53
+ static const symbol s_0_26[4] = { 'u', 't', 'r', 'o' };
54
+ static const symbol s_0_27[3] = { 'q', 'u', 'o' };
55
+ static const symbol s_0_28[4] = { 'u', 't', 'e', 'r' };
56
+ static const symbol s_0_29[3] = { 't', 'o', 'r' };
57
+ static const symbol s_0_30[5] = { 'o', 'b', 't', 'o', 'r' };
58
+ static const symbol s_0_31[7] = { 'p', 'r', 'a', 'e', 't', 'o', 'r' };
59
+ static const symbol s_0_32[5] = { 'd', 'e', 't', 'o', 'r' };
60
+ static const symbol s_0_33[5] = { 'r', 'e', 't', 'o', 'r' };
61
+ static const symbol s_0_34[5] = { 'i', 'n', 't', 'o', 'r' };
62
+ static const symbol s_0_35[6] = { 'c', 'o', 'n', 't', 'o', 'r' };
63
+ static const symbol s_0_36[5] = { 'o', 'p', 't', 'o', 'r' };
64
+ static const symbol s_0_37[5] = { 'a', 't', 't', 'o', 'r' };
65
+ static const symbol s_0_38[5] = { 'e', 'x', 't', 'o', 'r' };
66
+ static const symbol s_0_39[4] = { 'q', 'u', 'a', 's' };
67
+ static const symbol s_0_40[3] = { 'a', 'b', 's' };
68
+ static const symbol s_0_41[6] = { 'p', 'l', 'e', 'n', 'i', 's' };
69
+ static const symbol s_0_42[4] = { 'q', 'u', 'i', 's' };
70
+ static const symbol s_0_43[10] = { 'q', 'u', 'o', 't', 'u', 's', 'q', 'u', 'i', 's' };
71
+ static const symbol s_0_44[4] = { 'q', 'u', 'o', 's' };
72
+ static const symbol s_0_45[3] = { 'a', 'p', 's' };
73
+ static const symbol s_0_46[2] = { 'u', 's' };
74
+ static const symbol s_0_47[4] = { 'a', 'b', 'u', 's' };
75
+ static const symbol s_0_48[6] = { 'q', 'u', 'i', 'b', 'u', 's' };
76
+ static const symbol s_0_49[4] = { 'a', 'd', 'u', 's' };
77
+ static const symbol s_0_50[5] = { 'c', 'u', 'i', 'u', 's' };
78
+ static const symbol s_0_51[5] = { 'q', 'u', 'o', 'u', 's' };
79
+ static const symbol s_0_52[3] = { 's', 'u', 's' };
80
+ static const symbol s_0_53[2] = { 'a', 't' };
81
+
82
+ static const struct among a_0[54] =
83
+ {
84
+ /* 0 */ { 3, s_0_0, -1, -1, 0},
85
+ /* 1 */ { 3, s_0_1, -1, -1, 0},
86
+ /* 2 */ { 4, s_0_2, -1, -1, 0},
87
+ /* 3 */ { 5, s_0_3, -1, -1, 0},
88
+ /* 4 */ { 4, s_0_4, -1, -1, 0},
89
+ /* 5 */ { 2, s_0_5, -1, -1, 0},
90
+ /* 6 */ { 2, s_0_6, -1, -1, 0},
91
+ /* 7 */ { 6, s_0_7, -1, -1, 0},
92
+ /* 8 */ { 3, s_0_8, -1, -1, 0},
93
+ /* 9 */ { 4, s_0_9, -1, -1, 0},
94
+ /* 10 */ { 4, s_0_10, -1, -1, 0},
95
+ /* 11 */ { 4, s_0_11, -1, -1, 0},
96
+ /* 12 */ { 3, s_0_12, -1, -1, 0},
97
+ /* 13 */ { 3, s_0_13, -1, -1, 0},
98
+ /* 14 */ { 3, s_0_14, -1, -1, 0},
99
+ /* 15 */ { 4, s_0_15, -1, -1, 0},
100
+ /* 16 */ { 4, s_0_16, -1, -1, 0},
101
+ /* 17 */ { 6, s_0_17, -1, -1, 0},
102
+ /* 18 */ { 6, s_0_18, -1, -1, 0},
103
+ /* 19 */ { 2, s_0_19, -1, -1, 0},
104
+ /* 20 */ { 4, s_0_20, 19, -1, 0},
105
+ /* 21 */ { 4, s_0_21, 19, -1, 0},
106
+ /* 22 */ { 4, s_0_22, 19, -1, 0},
107
+ /* 23 */ { 5, s_0_23, 19, -1, 0},
108
+ /* 24 */ { 4, s_0_24, 19, -1, 0},
109
+ /* 25 */ { 6, s_0_25, -1, -1, 0},
110
+ /* 26 */ { 4, s_0_26, -1, -1, 0},
111
+ /* 27 */ { 3, s_0_27, -1, -1, 0},
112
+ /* 28 */ { 4, s_0_28, -1, -1, 0},
113
+ /* 29 */ { 3, s_0_29, -1, -1, 0},
114
+ /* 30 */ { 5, s_0_30, 29, -1, 0},
115
+ /* 31 */ { 7, s_0_31, 29, -1, 0},
116
+ /* 32 */ { 5, s_0_32, 29, -1, 0},
117
+ /* 33 */ { 5, s_0_33, 29, -1, 0},
118
+ /* 34 */ { 5, s_0_34, 29, -1, 0},
119
+ /* 35 */ { 6, s_0_35, 29, -1, 0},
120
+ /* 36 */ { 5, s_0_36, 29, -1, 0},
121
+ /* 37 */ { 5, s_0_37, 29, -1, 0},
122
+ /* 38 */ { 5, s_0_38, 29, -1, 0},
123
+ /* 39 */ { 4, s_0_39, -1, -1, 0},
124
+ /* 40 */ { 3, s_0_40, -1, -1, 0},
125
+ /* 41 */ { 6, s_0_41, -1, -1, 0},
126
+ /* 42 */ { 4, s_0_42, -1, -1, 0},
127
+ /* 43 */ { 10, s_0_43, 42, -1, 0},
128
+ /* 44 */ { 4, s_0_44, -1, -1, 0},
129
+ /* 45 */ { 3, s_0_45, -1, -1, 0},
130
+ /* 46 */ { 2, s_0_46, -1, -1, 0},
131
+ /* 47 */ { 4, s_0_47, 46, -1, 0},
132
+ /* 48 */ { 6, s_0_48, 46, -1, 0},
133
+ /* 49 */ { 4, s_0_49, 46, -1, 0},
134
+ /* 50 */ { 5, s_0_50, 46, -1, 0},
135
+ /* 51 */ { 5, s_0_51, 46, -1, 0},
136
+ /* 52 */ { 3, s_0_52, 46, -1, 0},
137
+ /* 53 */ { 2, s_0_53, -1, -1, 0}
138
+ };
139
+
140
+ static const symbol s_1_0[1] = { 'a' };
141
+ static const symbol s_1_1[2] = { 'i', 'a' };
142
+ static const symbol s_1_2[2] = { 'u', 'd' };
143
+ static const symbol s_1_3[1] = { 'e' };
144
+ static const symbol s_1_4[2] = { 'a', 'e' };
145
+ static const symbol s_1_5[1] = { 'i' };
146
+ static const symbol s_1_6[2] = { 'a', 'm' };
147
+ static const symbol s_1_7[2] = { 'e', 'm' };
148
+ static const symbol s_1_8[2] = { 'u', 'm' };
149
+ static const symbol s_1_9[1] = { 'o' };
150
+ static const symbol s_1_10[2] = { 'a', 's' };
151
+ static const symbol s_1_11[2] = { 'e', 's' };
152
+ static const symbol s_1_12[2] = { 'i', 's' };
153
+ static const symbol s_1_13[2] = { 'o', 's' };
154
+ static const symbol s_1_14[2] = { 'u', 's' };
155
+ static const symbol s_1_15[4] = { 'i', 'b', 'u', 's' };
156
+ static const symbol s_1_16[3] = { 'i', 'u', 's' };
157
+ static const symbol s_1_17[2] = { 'n', 't' };
158
+ static const symbol s_1_18[1] = { 'u' };
159
+
160
+ static const struct among a_1[19] =
161
+ {
162
+ /* 0 */ { 1, s_1_0, -1, 1, 0},
163
+ /* 1 */ { 2, s_1_1, 0, 1, 0},
164
+ /* 2 */ { 2, s_1_2, -1, 1, 0},
165
+ /* 3 */ { 1, s_1_3, -1, 1, 0},
166
+ /* 4 */ { 2, s_1_4, 3, 1, 0},
167
+ /* 5 */ { 1, s_1_5, -1, 1, 0},
168
+ /* 6 */ { 2, s_1_6, -1, 1, 0},
169
+ /* 7 */ { 2, s_1_7, -1, 1, 0},
170
+ /* 8 */ { 2, s_1_8, -1, 1, 0},
171
+ /* 9 */ { 1, s_1_9, -1, 1, 0},
172
+ /* 10 */ { 2, s_1_10, -1, 1, 0},
173
+ /* 11 */ { 2, s_1_11, -1, 1, 0},
174
+ /* 12 */ { 2, s_1_12, -1, 1, 0},
175
+ /* 13 */ { 2, s_1_13, -1, 1, 0},
176
+ /* 14 */ { 2, s_1_14, -1, 1, 0},
177
+ /* 15 */ { 4, s_1_15, 14, 1, 0},
178
+ /* 16 */ { 3, s_1_16, 14, 1, 0},
179
+ /* 17 */ { 2, s_1_17, -1, 1, 0},
180
+ /* 18 */ { 1, s_1_18, -1, 1, 0}
181
+ };
182
+
183
+ static const symbol s_2_0[4] = { 'm', 'i', 'n', 'i' };
184
+ static const symbol s_2_1[2] = { 'r', 'i' };
185
+ static const symbol s_2_2[3] = { 's', 't', 'i' };
186
+ static const symbol s_2_3[1] = { 'm' };
187
+ static const symbol s_2_4[2] = { 'b', 'o' };
188
+ static const symbol s_2_5[3] = { 'e', 'r', 'o' };
189
+ static const symbol s_2_6[1] = { 'r' };
190
+ static const symbol s_2_7[3] = { 'b', 'o', 'r' };
191
+ static const symbol s_2_8[3] = { 'm', 'u', 'r' };
192
+ static const symbol s_2_9[3] = { 't', 'u', 'r' };
193
+ static const symbol s_2_10[4] = { 'n', 't', 'u', 'r' };
194
+ static const symbol s_2_11[5] = { 'u', 'n', 't', 'u', 'r' };
195
+ static const symbol s_2_12[6] = { 'i', 'u', 'n', 't', 'u', 'r' };
196
+ static const symbol s_2_13[1] = { 's' };
197
+ static const symbol s_2_14[3] = { 'r', 'i', 's' };
198
+ static const symbol s_2_15[5] = { 'b', 'e', 'r', 'i', 's' };
199
+ static const symbol s_2_16[3] = { 't', 'i', 's' };
200
+ static const symbol s_2_17[4] = { 's', 't', 'i', 's' };
201
+ static const symbol s_2_18[2] = { 'n', 's' };
202
+ static const symbol s_2_19[3] = { 'm', 'u', 's' };
203
+ static const symbol s_2_20[1] = { 't' };
204
+ static const symbol s_2_21[2] = { 'n', 't' };
205
+ static const symbol s_2_22[3] = { 'u', 'n', 't' };
206
+ static const symbol s_2_23[4] = { 'i', 'u', 'n', 't' };
207
+ static const symbol s_2_24[5] = { 'e', 'r', 'u', 'n', 't' };
208
+
209
+ static const struct among a_2[25] =
210
+ {
211
+ /* 0 */ { 4, s_2_0, -1, 4, 0},
212
+ /* 1 */ { 2, s_2_1, -1, 4, 0},
213
+ /* 2 */ { 3, s_2_2, -1, 4, 0},
214
+ /* 3 */ { 1, s_2_3, -1, 4, 0},
215
+ /* 4 */ { 2, s_2_4, -1, 2, 0},
216
+ /* 5 */ { 3, s_2_5, -1, 3, 0},
217
+ /* 6 */ { 1, s_2_6, -1, 4, 0},
218
+ /* 7 */ { 3, s_2_7, 6, 2, 0},
219
+ /* 8 */ { 3, s_2_8, 6, 4, 0},
220
+ /* 9 */ { 3, s_2_9, 6, 4, 0},
221
+ /* 10 */ { 4, s_2_10, 9, 4, 0},
222
+ /* 11 */ { 5, s_2_11, 10, 1, 0},
223
+ /* 12 */ { 6, s_2_12, 11, 1, 0},
224
+ /* 13 */ { 1, s_2_13, -1, 4, 0},
225
+ /* 14 */ { 3, s_2_14, 13, 4, 0},
226
+ /* 15 */ { 5, s_2_15, 14, 2, 0},
227
+ /* 16 */ { 3, s_2_16, 13, 4, 0},
228
+ /* 17 */ { 4, s_2_17, 16, 4, 0},
229
+ /* 18 */ { 2, s_2_18, 13, 4, 0},
230
+ /* 19 */ { 3, s_2_19, 13, 4, 0},
231
+ /* 20 */ { 1, s_2_20, -1, 4, 0},
232
+ /* 21 */ { 2, s_2_21, 20, 4, 0},
233
+ /* 22 */ { 3, s_2_22, 21, 1, 0},
234
+ /* 23 */ { 4, s_2_23, 22, 1, 0},
235
+ /* 24 */ { 5, s_2_24, 22, 1, 0}
236
+ };
237
+
238
+ static const symbol s_0[] = { 'j' };
239
+ static const symbol s_1[] = { 'i' };
240
+ static const symbol s_2[] = { 'v' };
241
+ static const symbol s_3[] = { 'u' };
242
+ static const symbol s_4[] = { 'q', 'u', 'e' };
243
+ static const symbol s_5[] = { 'i' };
244
+ static const symbol s_6[] = { 'b', 'i' };
245
+ static const symbol s_7[] = { 'e', 'r', 'i' };
246
+
247
+ static int r_map_letters(struct SN_env * z) {
248
+ { int c1 = z->c; /* do, line 14 */
249
+ while(1) { /* repeat, line 14 */
250
+ int c2 = z->c;
251
+ while(1) { /* goto, line 14 */
252
+ int c3 = z->c;
253
+ z->bra = z->c; /* [, line 14 */
254
+ if (!(eq_s(z, 1, s_0))) goto lab2;
255
+ z->ket = z->c; /* ], line 14 */
256
+ z->c = c3;
257
+ break;
258
+ lab2:
259
+ z->c = c3;
260
+ if (z->c >= z->l) goto lab1;
261
+ z->c++; /* goto, line 14 */
262
+ }
263
+ { int ret = slice_from_s(z, 1, s_1); /* <-, line 14 */
264
+ if (ret < 0) return ret;
265
+ }
266
+ continue;
267
+ lab1:
268
+ z->c = c2;
269
+ break;
270
+ }
271
+ z->c = c1;
272
+ }
273
+ { int c4 = z->c; /* do, line 15 */
274
+ while(1) { /* repeat, line 15 */
275
+ int c5 = z->c;
276
+ while(1) { /* goto, line 15 */
277
+ int c6 = z->c;
278
+ z->bra = z->c; /* [, line 15 */
279
+ if (!(eq_s(z, 1, s_2))) goto lab5;
280
+ z->ket = z->c; /* ], line 15 */
281
+ z->c = c6;
282
+ break;
283
+ lab5:
284
+ z->c = c6;
285
+ if (z->c >= z->l) goto lab4;
286
+ z->c++; /* goto, line 15 */
287
+ }
288
+ { int ret = slice_from_s(z, 1, s_3); /* <-, line 15 */
289
+ if (ret < 0) return ret;
290
+ }
291
+ continue;
292
+ lab4:
293
+ z->c = c5;
294
+ break;
295
+ }
296
+ z->c = c4;
297
+ }
298
+ return 1;
299
+ }
300
+
301
+ static int r_que_word(struct SN_env * z) {
302
+ z->ket = z->c; /* [, line 22 */
303
+ if (!(eq_s_b(z, 3, s_4))) return 0;
304
+ z->bra = z->c; /* ], line 22 */
305
+ { int m1 = z->l - z->c; (void)m1; /* or, line 35 */
306
+ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1876514 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab1;
307
+ if (!(find_among_b(z, a_0, 54))) goto lab1; /* among, line 23 */
308
+ if (z->c > z->lb) goto lab1; /* atlimit, line 32 */
309
+ z->bra = z->c; /* ], line 32 */
310
+ z->S[0] = assign_to(z, z->S[0]); /* => noun_form, line 33 */
311
+ if (z->S[0] == 0) return -1; /* => noun_form, line 33 */
312
+ z->S[1] = assign_to(z, z->S[1]); /* => verb_form, line 34 */
313
+ if (z->S[1] == 0) return -1; /* => verb_form, line 34 */
314
+ goto lab0;
315
+ lab1:
316
+ z->c = z->l - m1;
317
+ { int ret = slice_del(z); /* delete, line 35 */
318
+ if (ret < 0) return ret;
319
+ }
320
+ return 0; /* fail, line 35 */
321
+ }
322
+ lab0:
323
+ return 1;
324
+ }
325
+
326
+ extern int latin_UTF_8_stem(struct SN_env * z) {
327
+ int among_var;
328
+ { int ret = r_map_letters(z);
329
+ if (ret == 0) return 0; /* call map_letters, line 41 */
330
+ if (ret < 0) return ret;
331
+ }
332
+ z->lb = z->c; z->c = z->l; /* backwards, line 43 */
333
+
334
+ { int m1 = z->l - z->c; (void)m1; /* or, line 44 */
335
+ { int ret = r_que_word(z);
336
+ if (ret == 0) goto lab1; /* call que_word, line 44 */
337
+ if (ret < 0) return ret;
338
+ }
339
+ goto lab0;
340
+ lab1:
341
+ z->c = z->l - m1;
342
+ z->S[0] = assign_to(z, z->S[0]); /* => noun_form, line 45 */
343
+ if (z->S[0] == 0) return -1; /* => noun_form, line 45 */
344
+ z->S[1] = assign_to(z, z->S[1]); /* => verb_form, line 46 */
345
+ if (z->S[1] == 0) return -1; /* => verb_form, line 46 */
346
+ { struct SN_env env = * z; /* $ noun_form, line 48 */
347
+ int failure = 1; /* assume failure */
348
+ z->p = z->S[0];
349
+ z->lb = z->c = 0;
350
+ z->l = SIZE(z->p);
351
+ z->lb = z->c; z->c = z->l; /* backwards, line 48 */
352
+
353
+ { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 48 */
354
+ z->ket = z->c; /* [, line 49 */
355
+ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((3711538 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab3; }
356
+ among_var = find_among_b(z, a_1, 19); /* substring, line 49 */
357
+ if (!(among_var)) { z->c = z->l - m_keep; goto lab3; }
358
+ z->bra = z->c; /* ], line 49 */
359
+ { int ret = z->c - 2;
360
+ if (z->lb > ret || ret > z->l) { z->c = z->l - m_keep; goto lab3; }
361
+ z->c = ret; /* hop, line 49 */
362
+ }
363
+ switch(among_var) {
364
+ case 0: { z->c = z->l - m_keep; goto lab3; }
365
+ case 1:
366
+ { int ret = slice_del(z); /* delete, line 53 */
367
+ if (ret < 0) return ret;
368
+ }
369
+ break;
370
+ }
371
+ lab3:
372
+ ;
373
+ }
374
+ z->c = z->lb;
375
+ failure = 0; /* mark success */
376
+ z->S[0] = z->p;
377
+ * z = env;
378
+ if (failure) return 0;
379
+ }
380
+ { struct SN_env env = * z; /* $ verb_form, line 57 */
381
+ int failure = 1; /* assume failure */
382
+ z->p = z->S[1];
383
+ z->lb = z->c = 0;
384
+ z->l = SIZE(z->p);
385
+ z->lb = z->c; z->c = z->l; /* backwards, line 57 */
386
+
387
+ { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 57 */
388
+ z->ket = z->c; /* [, line 58 */
389
+ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1876480 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab5; }
390
+ among_var = find_among_b(z, a_2, 25); /* substring, line 58 */
391
+ if (!(among_var)) { z->c = z->l - m_keep; goto lab5; }
392
+ z->bra = z->c; /* ], line 58 */
393
+ { int ret = z->c - 2;
394
+ if (z->lb > ret || ret > z->l) { z->c = z->l - m_keep; goto lab5; }
395
+ z->c = ret; /* hop, line 58 */
396
+ }
397
+ switch(among_var) {
398
+ case 0: { z->c = z->l - m_keep; goto lab5; }
399
+ case 1:
400
+ { int ret = slice_from_s(z, 1, s_5); /* <-, line 61 */
401
+ if (ret < 0) return ret;
402
+ }
403
+ break;
404
+ case 2:
405
+ { int ret = slice_from_s(z, 2, s_6); /* <-, line 63 */
406
+ if (ret < 0) return ret;
407
+ }
408
+ break;
409
+ case 3:
410
+ { int ret = slice_from_s(z, 3, s_7); /* <-, line 65 */
411
+ if (ret < 0) return ret;
412
+ }
413
+ break;
414
+ case 4:
415
+ { int ret = slice_del(z); /* delete, line 68 */
416
+ if (ret < 0) return ret;
417
+ }
418
+ break;
419
+ }
420
+ lab5:
421
+ ;
422
+ }
423
+ z->c = z->lb;
424
+ failure = 0; /* mark success */
425
+ z->S[1] = z->p;
426
+ * z = env;
427
+ if (failure) return 0;
428
+ }
429
+ }
430
+ lab0:
431
+ z->c = z->lb;
432
+ { int c_keep = z->c;
433
+ int ret = insert_v(z, z->c, z->l, z->S[0]); /* = noun_form, line 74 */
434
+ z->c = c_keep;
435
+ if (ret < 0) return ret;
436
+ }
437
+ return 1;
438
+ }
439
+
440
+ extern struct SN_env * latin_UTF_8_create_env(void) { return SN_create_env(2, 0, 0); }
441
+
442
+ extern void latin_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 2); }
443
+
@@ -0,0 +1,16 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ extern struct SN_env * latin_UTF_8_create_env(void);
9
+ extern void latin_UTF_8_close_env(struct SN_env * z);
10
+
11
+ extern int latin_UTF_8_stem(struct SN_env * z);
12
+
13
+ #ifdef __cplusplus
14
+ }
15
+ #endif
16
+
@@ -17,6 +17,12 @@ class TestStemmer < Test::Unit::TestCase
17
17
  end
18
18
  end
19
19
 
20
+ def test_latin
21
+ assert_nothing_raised do
22
+ ::Lingua::Stemmer.new :language => "latin", :encoding => "ISO_8859_1"
23
+ end
24
+ end
25
+
20
26
  def test_stem
21
27
  s= ::Lingua::Stemmer.new(:language => "en", :encoding => "UTF_8")
22
28
  assert_equal s.stem("obnoxious"), "obnoxi"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-stemmer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.8.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aurelian Oancea
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-11-06 00:00:00 +01:00
13
+ date: 2010-04-12 00:00:00 +02:00
14
14
  default_executable:
15
15
  dependencies: []
16
16
 
@@ -35,6 +35,7 @@ files:
35
35
  - libstemmer_c/Makefile.windows
36
36
  - libstemmer_c/README
37
37
  - libstemmer_c/examples/stemwords.c
38
+ - libstemmer_c/foo.txt
38
39
  - libstemmer_c/include/libstemmer.h
39
40
  - libstemmer_c/libstemmer/libstemmer.c
40
41
  - libstemmer_c/libstemmer/libstemmer_utf8.c
@@ -64,6 +65,8 @@ files:
64
65
  - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
65
66
  - libstemmer_c/src_c/stem_ISO_8859_1_italian.c
66
67
  - libstemmer_c/src_c/stem_ISO_8859_1_italian.h
68
+ - libstemmer_c/src_c/stem_ISO_8859_1_latin.c
69
+ - libstemmer_c/src_c/stem_ISO_8859_1_latin.h
67
70
  - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
68
71
  - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
69
72
  - libstemmer_c/src_c/stem_ISO_8859_1_porter.c
@@ -94,6 +97,8 @@ files:
94
97
  - libstemmer_c/src_c/stem_UTF_8_hungarian.h
95
98
  - libstemmer_c/src_c/stem_UTF_8_italian.c
96
99
  - libstemmer_c/src_c/stem_UTF_8_italian.h
100
+ - libstemmer_c/src_c/stem_UTF_8_latin.c
101
+ - libstemmer_c/src_c/stem_UTF_8_latin.h
97
102
  - libstemmer_c/src_c/stem_UTF_8_norwegian.c
98
103
  - libstemmer_c/src_c/stem_UTF_8_norwegian.h
99
104
  - libstemmer_c/src_c/stem_UTF_8_porter.c