ruby-sfst 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,94 @@
1
+ /*******************************************************************/
2
+ /* */
3
+ /* FILE interface.h */
4
+ /* MODULE interface */
5
+ /* PROGRAM SFST */
6
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
+ /* */
8
+ /*******************************************************************/
9
+
10
+ #ifndef _INTERFACE_H_
11
+ #define _INTERFACE_H_
12
+
13
+ #include "utf8.h"
14
+ #include "fst.h"
15
+
16
+ typedef enum {twol_left,twol_right,twol_both} Twol_Type;
17
+
18
+ typedef enum {repl_left,repl_right,repl_up,repl_down} Repl_Type;
19
+
20
+ typedef struct range_t {
21
+ Character character;
22
+ struct range_t *next;
23
+ } Range;
24
+
25
+ typedef struct ranges_t {
26
+ Range *range;
27
+ struct ranges_t *next;
28
+ } Ranges;
29
+
30
+
31
+ typedef struct contexts_t {
32
+ Transducer *left, *right;
33
+ struct contexts_t *next;
34
+ } Contexts;
35
+
36
+
37
+ extern bool Verbose;
38
+ extern bool UTF8;
39
+ extern char *FileName;
40
+ extern Alphabet TheAlphabet;
41
+
42
+ void error2( char *message, char *input );
43
+ Transducer *new_transducer( Range*, Range* );
44
+ Transducer *read_words( char *filename );
45
+ Transducer *read_transducer( char *filename );
46
+ Transducer *var_value( char *name );
47
+ Transducer *rvar_value( char *name );
48
+ Range *svar_value( char *name );
49
+ Range *complement_range( Range* );
50
+ Range *rsvar_value( char *name );
51
+ Character character_code( unsigned int uc );
52
+ Character symbol_code( char *s );
53
+
54
+ Range *add_value( Character, Range*);
55
+ Range *add_var_values( char *name, Range*);
56
+ Range *add_values( unsigned int, unsigned int, Range*);
57
+ Range *append_values( Range *r2, Range *r );
58
+ void add_alphabet( Transducer* );
59
+ void store_transducer( Transducer *a, char *filename );
60
+
61
+ // These functions delete their argument automata
62
+
63
+ void def_alphabet( Transducer *a );
64
+ bool def_var( char *name, Transducer *a );
65
+ bool def_rvar( char *name, Transducer *a );
66
+ bool def_svar( char *name, Range *r );
67
+ Transducer *explode( Transducer *a );
68
+ Transducer *catenate( Transducer *a1, Transducer *a2 );
69
+ Transducer *disjunction( Transducer *a1, Transducer *a2 );
70
+ Transducer *conjunction( Transducer *a1, Transducer *a2 );
71
+ Transducer *subtraction( Transducer *a1, Transducer *a2 );
72
+ Transducer *composition( Transducer *a1, Transducer *a2 );
73
+ Transducer *restriction( Transducer *a, Twol_Type type, Contexts *c, int );
74
+ Transducer *replace( Transducer *a, Repl_Type type, bool optional );
75
+ Transducer *replace_in_context( Transducer *a, Repl_Type type, Contexts *c, bool optional );
76
+ Transducer *negation( Transducer *a );
77
+ Transducer *upper_level( Transducer *a );
78
+ Transducer *lower_level( Transducer *a );
79
+ Transducer *minimise( Transducer *a );
80
+ Transducer *switch_levels( Transducer *a );
81
+ Transducer *repetition( Transducer *a );
82
+ Transducer *repetition2( Transducer *a );
83
+ Transducer *optional( Transducer *a );
84
+ Transducer *make_rule( Transducer *lc, Range *r1, Twol_Type type,
85
+ Range *r2, Transducer *rc );
86
+ Transducer *freely_insert( Transducer *a, Character c, Character c );
87
+ Transducer *make_mapping( Ranges*, Ranges* );
88
+ Ranges *add_range( Range*, Ranges* );
89
+ Contexts *make_context( Transducer *l, Transducer *r );
90
+ Contexts *add_context( Contexts *nc, Contexts *c );
91
+ Transducer *result( Transducer*, bool );
92
+ void write_to_file( Transducer*, char *filename);
93
+
94
+ #endif
@@ -0,0 +1,328 @@
1
+ /*******************************************************************/
2
+ /* */
3
+ /* FILE make-compact.C */
4
+ /* MODULE make-compact */
5
+ /* PROGRAM SFST */
6
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
+ /* */
8
+ /* PURPOSE Code needed for generating compact automata */
9
+ /* */
10
+ /*******************************************************************/
11
+
12
+ #include <math.h>
13
+
14
+ #include "make-compact.h"
15
+
16
+ using std::equal_range;
17
+ using std::sort;
18
+ using std::cerr;
19
+ using __gnu_cxx::hash_map;
20
+
21
+ class ARC {
22
+ public:
23
+ int cv;
24
+ Label label;
25
+ unsigned int target_node;
26
+
27
+ bool operator< ( const ARC a ) const {
28
+ return cv < a.cv;
29
+ };
30
+ };
31
+
32
+ typedef hash_map<Label, size_t, Label::label_hash, Label::label_eq> LabelNumber;
33
+
34
+
35
+ /*******************************************************************/
36
+ /* */
37
+ /* MakeCompactTransducer::sort */
38
+ /* */
39
+ /*******************************************************************/
40
+
41
+ void MakeCompactTransducer::sort( Level level )
42
+
43
+ {
44
+ for( unsigned int n=0; n<number_of_nodes; n++) {
45
+ unsigned int from=first_arc[n];
46
+ unsigned int to=first_arc[n+1];
47
+ int l=to-from;
48
+
49
+ // copy the arcs to a temporary table
50
+ ARC *arc=new ARC[l];
51
+ for( unsigned int i=from; i<to; i++) {
52
+ arc[i-from].cv = (int)label[i].get_char(level);
53
+ // make sure that epsilon arcs are stored at the beginning
54
+ // even if epsilon is not 0
55
+ if (arc[i-from].cv == (int)Label::epsilon)
56
+ arc[i-from].cv = -1;
57
+ arc[i-from].label = label[i];
58
+ arc[i-from].target_node = target_node[i];
59
+ }
60
+
61
+ // sort the table
62
+ ::sort( arc, arc+l );
63
+
64
+ // copy the arcs back to the original table
65
+ for( unsigned int i=from; i<to; i++) {
66
+ label[i] = arc[i-from].label;
67
+ target_node[i] = arc[i-from].target_node;
68
+ }
69
+
70
+ delete[] arc;
71
+ }
72
+ }
73
+
74
+
75
+ /*******************************************************************/
76
+ /* */
77
+ /* MakeCompactTransducer::count_arcs */
78
+ /* */
79
+ /*******************************************************************/
80
+
81
+ void MakeCompactTransducer::count_arcs( Node *node, NodeNumbering &index,
82
+ long vmark )
83
+ {
84
+ if (!node->was_visited( vmark )) {
85
+ unsigned int n = index[node];
86
+ finalp[n] = node->is_final();
87
+ first_arc[n] = 0;
88
+ Arcs *arcs=node->arcs();
89
+ for( ArcsIter p(arcs); p; p++ ) {
90
+ Arc *arc=p;
91
+ first_arc[n]++;
92
+ count_arcs(arc->target_node(), index, vmark);
93
+ }
94
+ }
95
+ }
96
+
97
+
98
+ /*******************************************************************/
99
+ /* */
100
+ /* MakeCompactTransducer::store_arcs */
101
+ /* */
102
+ /*******************************************************************/
103
+
104
+ void MakeCompactTransducer::store_arcs( Node *node, NodeNumbering &index,
105
+ long vmark )
106
+ {
107
+ if (!node->was_visited( vmark )) {
108
+ unsigned int n=first_arc[index[node]];
109
+ Arcs *arcs=node->arcs();
110
+ for( ArcsIter p(arcs); p; p++ ) {
111
+ Arc *arc=p;
112
+ label[n] = arc->label();
113
+ target_node[n++] = index[arc->target_node()];
114
+ store_arcs(arc->target_node(), index, vmark);
115
+ }
116
+ }
117
+ }
118
+
119
+
120
+ /*******************************************************************/
121
+ /* */
122
+ /* MakeCompactTransducer::MakeCompactTransducer */
123
+ /* */
124
+ /*******************************************************************/
125
+
126
+ MakeCompactTransducer::MakeCompactTransducer( Transducer &a, Level l )
127
+
128
+ {
129
+ if (a.is_infinitely_ambiguous()) {
130
+ cerr << "Error: resulting transducer contains an infinite loop!\n";
131
+ exit(1);
132
+ }
133
+
134
+ NodeNumbering index(a);
135
+
136
+ alphabet.copy(a.alphabet);
137
+
138
+ // memory allocation
139
+ number_of_nodes = index.number_of_nodes();
140
+ finalp = new char[number_of_nodes];
141
+ first_arc = new unsigned int[number_of_nodes+1];
142
+
143
+ // count the number of outgoing arcs for each node
144
+ // and store them in first_arc[]
145
+ a.incr_vmark();
146
+ count_arcs( a.root_node(), index, a.vmark );
147
+ for( int n=number_of_nodes; n>0; n-- )
148
+ first_arc[n] = first_arc[n-1];
149
+ first_arc[0] = 0;
150
+ for( unsigned int n=0; n<number_of_nodes; n++ )
151
+ first_arc[n+1] += first_arc[n];
152
+ number_of_arcs = first_arc[number_of_nodes];
153
+
154
+ // memory allocation
155
+ label = new Label[number_of_arcs];
156
+ target_node = new unsigned int[number_of_arcs];
157
+
158
+ // store the arcs
159
+ a.incr_vmark();
160
+ store_arcs( a.root_node(), index, a.vmark );
161
+
162
+ // sort the arcs
163
+ sort( l );
164
+ }
165
+
166
+
167
+ /*******************************************************************/
168
+ /* */
169
+ /* MakeCompactTransducer::store_finalp */
170
+ /* */
171
+ /*******************************************************************/
172
+
173
+ void MakeCompactTransducer::store_finalp( FILE *file )
174
+
175
+ {
176
+ int k=0;
177
+ unsigned char n=0;
178
+
179
+ for( size_t i=0; i<number_of_nodes; i++ ) {
180
+ n = n << 1;
181
+ if (finalp[i])
182
+ n |= 1;
183
+ if (++k == 8) {
184
+ fputc(n, file);
185
+ n = 0;
186
+ k = 0;
187
+ }
188
+ }
189
+ if (k > 0) {
190
+ n <<= 8-k;
191
+ fputc(n, file);
192
+ }
193
+ }
194
+
195
+
196
+ /*******************************************************************/
197
+ /* */
198
+ /* MakeCompactTransducer::store_first_arcs */
199
+ /* */
200
+ /* The data is encoded with the minimal number of bits needed. */
201
+ /* */
202
+ /*******************************************************************/
203
+
204
+ void MakeCompactTransducer::store_first_arcs( FILE *file )
205
+
206
+ {
207
+ int k=0;
208
+ unsigned int n=0;
209
+ // compute number of bits required for storing each item
210
+ size_t bits=(size_t)ceil(log(number_of_arcs+1)/log(2));
211
+
212
+ for( size_t i=0; i<=number_of_nodes; i++ ) {
213
+ unsigned int m=first_arc[i];
214
+ m <<= (sizeof(n)*8) - bits;
215
+ m >>= k;
216
+ n = n | m;
217
+ k += bits;
218
+ if (k >= (int)sizeof(n)*8) {
219
+ fwrite(&n, sizeof(n), 1, file);
220
+ k -= sizeof(n) * 8;
221
+ n = first_arc[i];
222
+ if (k == 0)
223
+ n = 0;
224
+ else
225
+ n = first_arc[i] << (sizeof(n) * 8 - k);
226
+ }
227
+ }
228
+ if (k > 0)
229
+ fwrite(&n, sizeof(n), 1, file);
230
+ }
231
+
232
+
233
+ /*******************************************************************/
234
+ /* */
235
+ /* MakeCompactTransducer::store_target_nodes */
236
+ /* */
237
+ /*******************************************************************/
238
+
239
+ void MakeCompactTransducer::store_target_nodes( FILE *file )
240
+
241
+ {
242
+ int k=0;
243
+ unsigned int n=0;
244
+ size_t bits=(size_t)ceil(log(number_of_nodes)/log(2));
245
+
246
+ for( size_t i=0; i<number_of_arcs; i++ ) {
247
+ unsigned int m=target_node[i];
248
+ m <<= (sizeof(n)*8) - bits;
249
+ m >>= k;
250
+ n = n | m;
251
+ k += bits;
252
+ if (k >= (int)sizeof(n)*8) {
253
+ fwrite(&n, sizeof(n), 1, file);
254
+ k -= sizeof(n)*8;
255
+ if (k == 0)
256
+ n = 0;
257
+ else
258
+ n = target_node[i] << (sizeof(n) * 8 - k);
259
+ }
260
+ }
261
+ if (k > 0)
262
+ fwrite(&n, sizeof(n), 1, file);
263
+ }
264
+
265
+
266
+ /*******************************************************************/
267
+ /* */
268
+ /* MakeCompactTransducer::store_labels */
269
+ /* */
270
+ /*******************************************************************/
271
+
272
+ void MakeCompactTransducer::store_labels( FILE *file )
273
+
274
+ {
275
+ size_t N=0;
276
+ LabelNumber LNum;
277
+ for( Alphabet::const_iterator it=alphabet.begin();
278
+ it != alphabet.end(); it++ )
279
+ {
280
+ Label l=*it;
281
+ LNum[l] = N++;
282
+ }
283
+
284
+ int k=0;
285
+ unsigned int n=0;
286
+ size_t bits=(size_t)ceil(log(alphabet.size())/log(2));
287
+
288
+ for( size_t i=0; i<number_of_arcs; i++ ) {
289
+ unsigned int l = LNum[label[i]];
290
+ unsigned int m=l;
291
+ m = m << (sizeof(n)*8) - bits;
292
+ m = m >> k;
293
+ n = n | m;
294
+ k += bits;
295
+ if (k >= (int)sizeof(n)*8) {
296
+ fwrite(&n, sizeof(n), 1, file);
297
+ k -= sizeof(n)*8;
298
+ if (k == 0)
299
+ n = 0;
300
+ else
301
+ n = l << (sizeof(n) * 8 - k);
302
+ }
303
+ }
304
+ if (k > 0)
305
+ fwrite(&n, sizeof(n), 1, file);
306
+ }
307
+
308
+
309
+ /*******************************************************************/
310
+ /* */
311
+ /* MakeCompactTransducer::store */
312
+ /* */
313
+ /*******************************************************************/
314
+
315
+ void MakeCompactTransducer::store( FILE *file )
316
+
317
+ {
318
+ fputc('c',file);
319
+ alphabet.store(file);
320
+ fwrite(&number_of_nodes, sizeof(number_of_nodes), 1, file);
321
+ fwrite(&number_of_arcs, sizeof(number_of_arcs), 1, file);
322
+ store_finalp(file);
323
+ store_first_arcs(file);
324
+ store_labels(file);
325
+ store_target_nodes(file);
326
+ if (ferror(file))
327
+ throw "Error encountered while writing transducer to file\n";
328
+ }
@@ -0,0 +1,34 @@
1
+ /*******************************************************************/
2
+ /* */
3
+ /* FILE make-compact.h */
4
+ /* MODULE make-compact */
5
+ /* PROGRAM SFST */
6
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
+ /* */
8
+ /*******************************************************************/
9
+
10
+ #ifndef _MAKE_COMPACT_H_
11
+ #define _MAKE_COMPACT_H_
12
+
13
+ #include "fst.h"
14
+ #include "compact.h"
15
+
16
+
17
+ class MakeCompactTransducer : CompactTransducer {
18
+
19
+ private:
20
+ void count_arcs(Node *node, NodeNumbering &index, long vmark);
21
+ void store_arcs(Node *node, NodeNumbering &index, long vmark);
22
+ void store_finalp( FILE *file );
23
+ void store_first_arcs( FILE *file );
24
+ void store_target_nodes( FILE *file );
25
+ void store_labels( FILE *file );
26
+
27
+ public:
28
+ MakeCompactTransducer( Transducer &a, Level sort=upper );
29
+
30
+ void sort( Level );
31
+ void store( FILE *file );
32
+ };
33
+
34
+ #endif
@@ -0,0 +1,74 @@
1
+ /*******************************************************************/
2
+ /* */
3
+ /* FILE mem.h */
4
+ /* MODULE mem */
5
+ /* PROGRAM SFST */
6
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
+ /* */
8
+ /* PURPOSE memory management functions */
9
+ /* */
10
+ /*******************************************************************/
11
+
12
+ #ifndef _MEM_H_
13
+ #define _MEM_H_
14
+
15
+ #include <stdlib.h>
16
+
17
+ #define MEMBUFFER_SIZE 100000
18
+
19
+
20
+ /***************** class Mem *************************************/
21
+
22
+ class Mem {
23
+
24
+ private:
25
+
26
+ struct MemBuffer {
27
+ char buffer[MEMBUFFER_SIZE];
28
+ struct MemBuffer *next;
29
+ };
30
+
31
+ MemBuffer *first_buffer;
32
+ long pos;
33
+ void add_buffer() {
34
+ MemBuffer *mb=(MemBuffer*)malloc(sizeof(MemBuffer));
35
+ mb->next = first_buffer;
36
+ first_buffer = mb;
37
+ pos = 0;
38
+ }
39
+
40
+ public:
41
+ Mem() { first_buffer = NULL; add_buffer(); }
42
+ ~Mem() { clear(); }
43
+
44
+ void clear() {
45
+ while (first_buffer) {
46
+ MemBuffer *next = first_buffer->next;
47
+ free(first_buffer);
48
+ first_buffer = next;
49
+ }
50
+ pos = 0;
51
+ }
52
+
53
+ void *alloc( size_t n ) {
54
+ void *result;
55
+
56
+ /* do memory alignment to multiples of 4 */
57
+ if (n % 4)
58
+ n += 4 - (n % 4);
59
+
60
+ if (first_buffer == NULL || pos+n > MEMBUFFER_SIZE)
61
+ add_buffer();
62
+ if (pos+n > MEMBUFFER_SIZE)
63
+ throw "Allocation of memory block larger than MEMBUFFER_SIZE attempted!";
64
+
65
+ result = (void*)(first_buffer->buffer + pos);
66
+ pos += n;
67
+ return result;
68
+ }
69
+
70
+ //class MemError {};
71
+
72
+ };
73
+
74
+ #endif