ots 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/README.md +80 -0
  2. data/dictionaries/bg.xml +101 -0
  3. data/dictionaries/ca.xml +141 -0
  4. data/dictionaries/cs.xml +161 -0
  5. data/dictionaries/cy.xml +118 -0
  6. data/dictionaries/da.xml +129 -0
  7. data/dictionaries/de.xml +354 -0
  8. data/dictionaries/el.xml +80 -0
  9. data/dictionaries/en.xml +606 -0
  10. data/dictionaries/eo.xml +171 -0
  11. data/dictionaries/es.xml +369 -0
  12. data/dictionaries/et.xml +172 -0
  13. data/dictionaries/eu.xml +77 -0
  14. data/dictionaries/fi.xml +105 -0
  15. data/dictionaries/fr.xml +199 -0
  16. data/dictionaries/ga.xml +124 -0
  17. data/dictionaries/gl.xml +290 -0
  18. data/dictionaries/he.xml +334 -0
  19. data/dictionaries/hu.xml +280 -0
  20. data/dictionaries/ia.xml +97 -0
  21. data/dictionaries/id.xml +75 -0
  22. data/dictionaries/is.xml +201 -0
  23. data/dictionaries/it.xml +206 -0
  24. data/dictionaries/lv.xml +77 -0
  25. data/dictionaries/mi.xml +76 -0
  26. data/dictionaries/ms.xml +160 -0
  27. data/dictionaries/mt.xml +73 -0
  28. data/dictionaries/nl.xml +245 -0
  29. data/dictionaries/nn.xml +264 -0
  30. data/dictionaries/pl.xml +92 -0
  31. data/dictionaries/pt.xml +365 -0
  32. data/dictionaries/ro.xml +163 -0
  33. data/dictionaries/ru.xml +150 -0
  34. data/dictionaries/sv.xml +255 -0
  35. data/dictionaries/tl.xml +67 -0
  36. data/dictionaries/tr.xml +65 -0
  37. data/dictionaries/uk.xml +98 -0
  38. data/dictionaries/yi.xml +293 -0
  39. data/ext/article.c +119 -0
  40. data/ext/dictionary.c +335 -0
  41. data/ext/extconf.rb +13 -14
  42. data/ext/grader-tc.c +185 -0
  43. data/ext/grader-tc.h +64 -0
  44. data/ext/grader-tf.c +116 -0
  45. data/ext/grader.c +85 -0
  46. data/ext/highlighter.c +128 -0
  47. data/ext/html.c +131 -0
  48. data/ext/libots.h +158 -0
  49. data/ext/ots.c +130 -151
  50. data/ext/ots.h +15 -0
  51. data/ext/parser.c +173 -0
  52. data/ext/relations.c +163 -0
  53. data/ext/stemmer.c +332 -0
  54. data/ext/text.c +98 -0
  55. data/ext/version.h +2 -0
  56. data/ext/wordlist.c +220 -0
  57. data/test/helper.rb +3 -0
  58. data/test/test_article.rb +52 -0
  59. data/test/test_ots.rb +23 -0
  60. metadata +122 -38
  61. data/README +0 -25
  62. data/VERSION +0 -1
  63. data/lib/ots.rb +0 -1
  64. data/test/ots_test.rb +0 -62
@@ -0,0 +1,15 @@
1
+ #pragma once
2
+
3
+ #include <ruby.h>
4
+ #include <ruby/encoding.h>
5
+
6
+ #include <stdio.h>
7
+ #include <stdlib.h>
8
+ #include <string.h>
9
+
10
+ #include <libots.h>
11
+ #include "version.h"
12
+
13
+ #define TO_S(v) rb_funcall(v, rb_intern("to_s"), 0)
14
+ #define CSTRING(v) RSTRING_PTR(TO_S(v))
15
+ #define rb_enc_str_new2(text, enc) rb_enc_str_new(text, strlen(text), enc)
@@ -0,0 +1,173 @@
1
+ /*
2
+ * parser.c
3
+ *
4
+ * Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
5
+ *
6
+ * This program is free software; you can redistribute it and/or modify
7
+ * it under the terms of the GNU General Public License as published by
8
+ * the Free Software Foundation; either version 2 of the License, or
9
+ * (at your option) any later version.
10
+ *
11
+ * This program is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ * GNU Library General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU General Public License
17
+ * along with this program; if not, write to the Free Software
18
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19
+ */
20
+
21
+ #include <stdio.h>
22
+ #include <stdlib.h>
23
+ #include <string.h>
24
+ #include <strings.h>
25
+ #include "libots.h"
26
+
27
+ #define BUFFER_SIZE (1024*8)
28
+
29
+ int
30
+ ots_match_post (const char *aWord,const char *post)
31
+ {
32
+ int i, wlen, plen;
33
+
34
+
35
+ wlen = strlen (aWord);
36
+ plen = strlen (post);
37
+
38
+ if (plen > wlen) return 0;
39
+
40
+ for (i = 0; i < plen; i++)
41
+ if (aWord[wlen - plen + i] != post[i])
42
+ return 0; /* no match */
43
+
44
+ return 1; /*word match */
45
+ }
46
+
47
+ void
48
+ ots_parse_file (FILE * stream, OtsArticle * Doc )
49
+ {
50
+ unsigned char fread_buffer[BUFFER_SIZE];
51
+ unsigned char *buffer;
52
+ size_t nread, total_read, avail_size;
53
+
54
+ buffer = g_new0 (unsigned char, BUFFER_SIZE);
55
+
56
+ avail_size = BUFFER_SIZE;
57
+ total_read = nread = 0;
58
+ while ((nread =
59
+ fread (fread_buffer, sizeof (unsigned char), sizeof (fread_buffer),
60
+ stream)) > 0)
61
+ {
62
+ if (nread + total_read > avail_size)
63
+ {
64
+ avail_size *= 2;
65
+ buffer = g_renew (unsigned char, buffer, avail_size);
66
+ }
67
+
68
+ strncpy (buffer + total_read, fread_buffer, nread);
69
+ total_read += nread;
70
+ }
71
+
72
+ ots_parse_stream (buffer, total_read, Doc);
73
+ g_free (buffer);
74
+ }
75
+
76
+
77
+
78
+
79
+
80
+ int
81
+ ots_parser_should_break(const char *aWord,const OtsStemRule * rule)
82
+ {
83
+ GList *li;
84
+ char *postfix;
85
+ int toBreak=0;
86
+
87
+ for (li = (GList *) rule->ParserBreak; li != NULL; li = li->next)
88
+ {
89
+ postfix=li->data;
90
+ if (ots_match_post (aWord, postfix) )
91
+ {
92
+ toBreak=1;
93
+ break;
94
+ }
95
+
96
+ }
97
+
98
+
99
+ for (li = (GList *) rule->ParserDontBreak; li != NULL; li = li->next)
100
+ {
101
+ postfix=li->data;
102
+ if (ots_match_post (aWord, postfix) )
103
+ {
104
+ toBreak=0;
105
+ break;
106
+ }
107
+
108
+ }
109
+ return toBreak;
110
+ }
111
+
112
+
113
+
114
+ void
115
+ ots_parse_stream(const unsigned char *utf8, size_t len, OtsArticle * Doc) /*parse the unicode stream */
116
+ {
117
+
118
+ OtsSentence *tmpLine = ots_append_line (Doc);
119
+ OtsStemRule * rule=Doc->stem;
120
+ gunichar uc;
121
+ int index = 0;
122
+ char *s = (char *) utf8;
123
+ GString *word_buffer = g_string_new (NULL);
124
+
125
+
126
+ while ((*s) && (index < len))
127
+ {
128
+ uc = g_utf8_get_char (s);
129
+
130
+ if (!g_unichar_isspace (uc)) /* space is the end of a word */
131
+ {
132
+
133
+ g_string_append_unichar(word_buffer,uc);
134
+
135
+ }
136
+ else
137
+ {
138
+
139
+ if (0<word_buffer->len)
140
+ {
141
+ ots_append_word (tmpLine, word_buffer->str);
142
+
143
+ if (ots_parser_should_break(word_buffer->str,rule)) {
144
+ tmpLine = ots_append_line (Doc); /* Add a new Line */
145
+ }
146
+
147
+ g_string_assign (word_buffer, "");
148
+
149
+ }
150
+
151
+ if (uc=='\n') {ots_append_word (tmpLine,"\n");}
152
+ else
153
+ {ots_append_word (tmpLine," ");}
154
+
155
+ g_string_assign (word_buffer,"");
156
+ }
157
+
158
+ s = g_utf8_next_char (s);
159
+
160
+ index++;
161
+ }
162
+
163
+
164
+ if (0<word_buffer->len) /*final flush*/
165
+ {
166
+ ots_append_word (tmpLine, word_buffer->str);
167
+ g_string_assign (word_buffer, "");
168
+ }
169
+
170
+
171
+
172
+ g_string_free (word_buffer, TRUE);
173
+ }
@@ -0,0 +1,163 @@
1
+ /*
2
+ * relations.c
3
+ *
4
+ * Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
5
+ *
6
+ * This program is free software; you can redistribute it and/or modify
7
+ * it under the terms of the GNU General Public License as published by
8
+ * the Free Software Foundation; either version 2 of the License, or
9
+ * (at your option) any later version.
10
+ *
11
+ * This program is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ * GNU Library General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU General Public License
17
+ * along with this program; if not, write to the Free Software
18
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19
+ */
20
+
21
+ #include <stdio.h>
22
+ #include <stdlib.h>
23
+ #include <string.h>
24
+ #include "grader-tc.h"
25
+
26
+ #include "libots.h"
27
+ /*
28
+ The Inner product of two texts is defined as the number of topics they
29
+ share. This set of functions implements this relations using the ots
30
+ api.
31
+
32
+ Application: a relation between a slashdot article and a comment made
33
+ usage: ots_text_relations(story,"en",comment,"en",n);
34
+ where n is the max number of most important topics to consider; safe to give a high number (ex: 20);
35
+
36
+ returns:
37
+ 0 - off topic
38
+ n - number of topics they share
39
+
40
+ */
41
+
42
+ #define OTS_MAX_TOPIC_WORD_SIZE 256
43
+
44
+ /*Returns the number of topics that two blocks of text share*/
45
+ int ots_text_relations(
46
+ const unsigned char *text1,const unsigned char *lang_code1,
47
+ const unsigned char *text2,const unsigned char *lang_code2,const int topic_num)
48
+ {
49
+ GList* top1;
50
+ GList* top2;
51
+ int score;
52
+
53
+ top1=ots_text_stem_list(text1,lang_code1,topic_num);
54
+ top2=ots_text_stem_list(text2,lang_code2,topic_num);
55
+
56
+ score=ots_topic_list_score(top1,top2);
57
+
58
+ if (top1){g_list_foreach (top1, (GFunc) g_free, NULL);g_list_free (top1);}
59
+ if (top2){g_list_foreach (top2, (GFunc) g_free, NULL);g_list_free (top2);}
60
+
61
+ return score;
62
+ }
63
+
64
+
65
+
66
+
67
+ /*For a given text, return the list of the topics*/
68
+ char* ots_text_topics(
69
+ const unsigned char *text,const unsigned char *lang_code,int topic_num)
70
+ {
71
+ int i;
72
+ GString *word;
73
+ unsigned char *str;
74
+ unsigned char *tmp;
75
+ OtsArticle *Art;
76
+
77
+ if (NULL==text) return NULL;
78
+ word = g_string_new (NULL);
79
+
80
+ Art = ots_new_article ();
81
+
82
+ ots_load_xml_dictionary(Art,lang_code); /*Load the dictionary*/
83
+ if (text!=NULL) ots_parse_stream (text,strlen(text), Art); /* read text , put it in struct Article */
84
+ ots_grade_doc (Art);
85
+
86
+
87
+ for (i=0;i<=topic_num;i++)
88
+ {
89
+ tmp=ots_word_in_list(Art->ImpWords,i);
90
+ if ((tmp!=NULL)&&(strlen(tmp)>0)) {g_string_append(word,tmp);
91
+ g_string_append(word," "); }
92
+ }
93
+
94
+
95
+ str=word->str;
96
+ g_string_free (word, FALSE);
97
+ ots_free_article (Art);
98
+
99
+ return str;
100
+ }
101
+
102
+
103
+
104
+ /*For a given text, return the list of the stemmed topics*/
105
+ GList* ots_text_stem_list(const unsigned char *text, const unsigned char *lang_code, int topic_num)
106
+ {
107
+ int i;
108
+ GList *topics=NULL;
109
+ unsigned char *tmp;
110
+ OtsArticle *Art;
111
+
112
+ if (NULL==text) return NULL;
113
+
114
+ Art = ots_new_article ();
115
+
116
+ ots_load_xml_dictionary(Art,lang_code);
117
+ if (text!=NULL) ots_parse_stream (text,strlen(text), Art);
118
+ ots_grade_doc (Art);
119
+
120
+
121
+ for (i=0;i<=topic_num;i++)
122
+ {
123
+ tmp=ots_stem_in_list(Art->ImpWords,i);
124
+ if ((tmp)&&(strlen(tmp)>0))
125
+ topics=g_list_append(topics,g_strdup(tmp));
126
+ }
127
+
128
+
129
+ ots_free_article (Art);
130
+ return topics;
131
+ }
132
+
133
+ /*Gives a score on the relations between two lists of topics; simmilar to the inner product*/
134
+ int ots_topic_list_score(
135
+ const GList *topic_list1,
136
+ const GList *topic_list2)
137
+ {
138
+ int count=0;
139
+ GList *tmplist1;
140
+ GList *tmplist2;
141
+
142
+ if (!(topic_list1)) return 0;
143
+ if (!(topic_list2)) return 0;
144
+
145
+ tmplist1 = g_list_first(topic_list1);
146
+ while(tmplist1)
147
+ {
148
+ tmplist2 = g_list_first(topic_list2);
149
+ while(tmplist2)
150
+ {
151
+
152
+ if ((tmplist1->data)&&(tmplist2->data)&&(strlen(tmplist2->data)>1))
153
+ if (0==strncmp(tmplist1->data,tmplist2->data,OTS_MAX_TOPIC_WORD_SIZE))
154
+ {count++;}
155
+
156
+ tmplist2 = g_list_next(tmplist2);
157
+ }
158
+ tmplist1 = g_list_next(tmplist1);
159
+ }
160
+
161
+ return count;
162
+ }
163
+
@@ -0,0 +1,332 @@
1
+ /*
2
+ * stemmer.c
3
+ *
4
+ * Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
5
+ *
6
+ * This program is free software; you can redistribute it and/or modify
7
+ * it under the terms of the GNU General Public License as published by
8
+ * the Free Software Foundation; either version 2 of the License, or
9
+ * (at your option) any later version.
10
+ *
11
+ * This program is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ * GNU Library General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU General Public License
17
+ * along with this program; if not, write to the Free Software
18
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19
+ */
20
+
21
+
22
+ #include <stdio.h>
23
+ #include <stdlib.h>
24
+ #include <string.h>
25
+ #include "libots.h"
26
+
27
+ #define MAX_PREFIX_SIZE 256
28
+
29
+ OtsStemRule *
30
+ new_stem_rule ()
31
+ {
32
+ OtsStemRule *rule = g_new0 (OtsStemRule, 1);
33
+ return rule;
34
+ }
35
+
36
+ void
37
+ free_stem_rule (OtsStemRule *rule)
38
+ {
39
+
40
+ if (rule != NULL)
41
+ {
42
+ g_list_foreach (rule->RemovePre, (GFunc) g_free, NULL);
43
+ g_list_free (rule->RemovePre);
44
+ g_list_foreach (rule->RemovePost, (GFunc) g_free, NULL);
45
+ g_list_free (rule->RemovePost);
46
+
47
+ g_list_foreach (rule->step1_pre, (GFunc) g_free, NULL);
48
+ g_list_free (rule->step1_pre);
49
+ g_list_foreach (rule->step1_post, (GFunc) g_free, NULL);
50
+ g_list_free (rule->step1_post);
51
+
52
+ g_list_foreach (rule->synonyms, (GFunc) g_free, NULL);
53
+ g_list_free (rule->synonyms);
54
+ g_list_foreach (rule->manual, (GFunc) g_free, NULL);
55
+ g_list_free (rule->manual);
56
+
57
+ g_list_foreach (rule->ParserBreak, (GFunc) g_free, NULL);
58
+ g_list_free (rule->ParserBreak);
59
+ g_list_foreach (rule->ParserDontBreak, (GFunc) g_free, NULL);
60
+ g_list_free (rule->ParserDontBreak);
61
+
62
+ g_list_foreach (rule->ReplaceChars, (GFunc) g_free, NULL);
63
+ g_list_free (rule->ReplaceChars);
64
+
65
+ g_free (rule);
66
+ }
67
+ return;
68
+ }
69
+
70
+ static void
71
+ ots_stem_break (unsigned const char *comp,unsigned char *part_a,unsigned char *part_b) /*given already alocated part_a and b */
72
+ { /*example "red|blue" */
73
+ int i, j, clen;
74
+ i = 0;
75
+ j = 0;
76
+
77
+ if (comp==NULL) return;
78
+ if (part_a==NULL) return;
79
+ if (part_b==NULL) return;
80
+
81
+ clen = strlen (comp);
82
+
83
+
84
+ part_a[0] = 0;
85
+ part_b[0] = 0;
86
+
87
+ while ((i < clen) && (i < MAX_PREFIX_SIZE) && (comp[i] != '|'))
88
+ {
89
+ part_a[i] = comp[i];
90
+ i++;
91
+ }
92
+ part_a[i] = 0;
93
+
94
+ i++; /*skip the | mark */
95
+ while (i < clen && (j < MAX_PREFIX_SIZE))
96
+ {
97
+ part_b[j] = comp[i];
98
+ i++;
99
+ j++;
100
+ }
101
+ part_b[j] = 0;
102
+ return;
103
+ }
104
+
105
+
106
+ static unsigned char *
107
+ ots_stem_remove_pre (unsigned const char *aWord,unsigned const char *pre,unsigned const char *new)
108
+ {
109
+ int i, plen, wlen, nlen;
110
+ unsigned char *new_str = NULL;
111
+
112
+ if (aWord==NULL) return NULL;
113
+
114
+ plen = strlen (pre);
115
+ wlen = strlen (aWord);
116
+ nlen = strlen (new);
117
+
118
+ for (i = 0; i < plen; i++)
119
+ if (aWord[i] != pre[i])
120
+ return NULL; /*no match */
121
+
122
+ new_str = g_new0 (char, wlen + nlen +5);
123
+ for (i = 0; i <= nlen; i++)
124
+ new_str[i] = new[i];
125
+
126
+ for (i = nlen; i <= nlen + wlen - plen; i++)
127
+ new_str[i] = aWord[i + plen - nlen];
128
+
129
+ new_str[i + 1] = 0;
130
+ return new_str;
131
+ }
132
+
133
+
134
+
135
+ static unsigned char *
136
+ ots_stem_remove_post (unsigned const char *aWord,unsigned const char *post,unsigned const char *new)
137
+ {
138
+ unsigned int i, wlen, plen, nlen;
139
+ unsigned char *new_str = NULL;
140
+
141
+ if ((NULL==aWord)||(NULL==post)||(NULL==new)) return NULL;
142
+
143
+ wlen = strlen (aWord);
144
+ plen = strlen (post);
145
+ nlen = strlen (new);
146
+
147
+ if (plen>wlen) return NULL;
148
+
149
+
150
+ for (i = 0; i < plen; i++)
151
+ if (aWord[wlen - plen + i]!= post[i])
152
+ return NULL; /* no match */
153
+
154
+ new_str = g_new0 (char, wlen + nlen +5);
155
+
156
+ for (i = 0; i <= wlen - plen; i++) /*place word */
157
+ new_str[i] = aWord[i];
158
+
159
+ for (i = 0; i <= nlen; i++) /*place newfix */
160
+ new_str[wlen - plen + i] = new[i];
161
+
162
+ return new_str; /*word replaced */
163
+ }
164
+
165
+
166
+
167
+ static unsigned char *
168
+ ots_stem_replace_word (unsigned const char *aWord,unsigned const char *old,unsigned const char *new)
169
+ {
170
+
171
+ if (aWord==NULL) return NULL;
172
+
173
+ if ((aWord)&&(0 == strcmp (aWord, old)))
174
+ {
175
+ return g_strdup (new);
176
+ }
177
+ else
178
+ {
179
+ return NULL;
180
+ }
181
+
182
+ }
183
+
184
+
185
+
186
+
187
+
188
+ unsigned char *
189
+ ots_stem_format (unsigned const char *aWord, const OtsStemRule * rule)
190
+ {
191
+ GList *li;
192
+ unsigned char *rep = NULL;
193
+ unsigned char *normWord = NULL;
194
+
195
+ if (aWord==NULL) return NULL;
196
+
197
+ normWord = g_utf8_strdown (aWord, -1); /*lowercase the word */
198
+
199
+ char *prefix;
200
+ char *newfix;
201
+
202
+ prefix = g_new0 (char, MAX_PREFIX_SIZE);
203
+ newfix = g_new0 (char, MAX_PREFIX_SIZE);
204
+
205
+ for (li = (GList *) rule->step1_pre; li != NULL; li = li->next)
206
+ {
207
+ ots_stem_break (li->data, prefix, newfix);
208
+ rep = ots_stem_remove_pre (normWord, prefix, newfix);
209
+ if (NULL != rep)
210
+ {
211
+ g_free (normWord);
212
+ normWord = rep;
213
+ rep = NULL;
214
+ }
215
+ }
216
+
217
+
218
+ for (li = (GList *) rule->step1_post; li != NULL; li = li->next)
219
+ {
220
+ ots_stem_break (li->data, prefix, newfix);
221
+ rep = ots_stem_remove_post(normWord, prefix, newfix);
222
+ if (NULL != rep)
223
+ {
224
+ g_free (normWord);
225
+ normWord = rep;
226
+ rep = NULL;
227
+ }
228
+ }
229
+
230
+ g_free (prefix);
231
+ g_free (newfix);
232
+
233
+ return normWord;
234
+ }
235
+
236
+
237
+
238
+
239
+
240
+
241
+
242
+
243
+ unsigned char *
244
+ ots_stem_strip (unsigned const char *aWord,const OtsStemRule * rule)
245
+ {
246
+ GList *li;
247
+ unsigned char *rep = NULL;
248
+
249
+ unsigned char *prefix;
250
+ unsigned char *newfix;
251
+ unsigned char *normWord=NULL;
252
+
253
+ prefix = g_new0 (char, MAX_PREFIX_SIZE);
254
+ newfix = g_new0 (char, MAX_PREFIX_SIZE);
255
+
256
+ if (aWord==NULL) return NULL;
257
+
258
+ normWord = ots_stem_format (aWord,rule);
259
+
260
+
261
+ for (li = (GList *) rule->manual; li != NULL; li = li->next)
262
+ {
263
+ ots_stem_break (li->data, prefix, newfix);
264
+ rep = ots_stem_replace_word (normWord, prefix, newfix);
265
+ if (NULL != rep)
266
+ {
267
+ g_free (normWord);
268
+ normWord = rep;
269
+ rep = NULL;
270
+ break;
271
+ }
272
+ }
273
+
274
+
275
+
276
+
277
+ for (li = (GList *) rule->RemovePre; li != NULL; li = li->next)
278
+ {
279
+ ots_stem_break (li->data, prefix, newfix);
280
+ rep = ots_stem_remove_pre (normWord, prefix, newfix);
281
+ if (NULL != rep)
282
+ {
283
+ g_free (normWord);
284
+ normWord = rep;
285
+ rep = NULL;
286
+ break;
287
+ }
288
+ }
289
+
290
+
291
+ for (li = (GList *) rule->RemovePost; li != NULL; li = li->next)
292
+ {
293
+ ots_stem_break (li->data, prefix, newfix);
294
+ rep = ots_stem_remove_post (normWord, prefix, newfix);
295
+ if (NULL != rep)
296
+ {
297
+ g_free (normWord);
298
+ normWord = rep;
299
+ rep = NULL;
300
+ break;
301
+ }
302
+
303
+ }
304
+
305
+
306
+ for (li = (GList *) rule->synonyms; li != NULL; li = li->next)
307
+ {
308
+ ots_stem_break (li->data, prefix, newfix);
309
+ rep = ots_stem_replace_word (normWord, prefix, newfix);
310
+ if (NULL != rep)
311
+ {
312
+ g_free (normWord);
313
+ normWord = rep;
314
+ rep = NULL;
315
+ break;
316
+ }
317
+ }
318
+
319
+
320
+ g_free (prefix);
321
+ g_free (newfix);
322
+
323
+
324
+ if (strlen(normWord)<3) /*stem is two letter long. thats not right. N(eed)==N(ation) ?*/
325
+ {
326
+ g_free(normWord);
327
+ normWord = ots_stem_format (aWord,rule); /*lowercase the word */
328
+ }
329
+
330
+
331
+ return normWord;
332
+ }