ruby-sfst 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -0
  3. data/COPYING +280 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +54 -0
  6. data/README.md +1 -1
  7. data/Rakefile +9 -18
  8. data/bin/console +7 -0
  9. data/bin/setup +6 -0
  10. data/ext/sfst/alphabet.cc +879 -0
  11. data/ext/sfst/alphabet.h +302 -0
  12. data/ext/sfst/basic.cc +85 -0
  13. data/ext/{sfst_machine → sfst}/basic.h +7 -4
  14. data/ext/sfst/compact.cc +629 -0
  15. data/ext/sfst/compact.h +100 -0
  16. data/ext/sfst/determinise.cc +279 -0
  17. data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
  18. data/ext/sfst/fst.cc +1150 -0
  19. data/ext/sfst/fst.h +374 -0
  20. data/ext/sfst/hopcroft.cc +681 -0
  21. data/ext/sfst/interface.cc +1921 -0
  22. data/ext/sfst/interface.h +171 -0
  23. data/ext/sfst/make-compact.cc +323 -0
  24. data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
  25. data/ext/sfst/mem.h +80 -0
  26. data/ext/sfst/operators.cc +1273 -0
  27. data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
  28. data/ext/sfst/sgi.h +72 -0
  29. data/ext/sfst/utf8.cc +149 -0
  30. data/ext/{sfst_machine → sfst}/utf8.h +7 -4
  31. data/lib/sfst.rb +2 -1
  32. data/lib/sfst/version.rb +1 -1
  33. data/ruby-sfst.gemspec +23 -23
  34. metadata +107 -35
  35. data/ext/sfst_machine/alphabet.cc +0 -812
  36. data/ext/sfst_machine/alphabet.h +0 -273
  37. data/ext/sfst_machine/basic.cc +0 -84
  38. data/ext/sfst_machine/compact.cc +0 -616
  39. data/ext/sfst_machine/compact.h +0 -98
  40. data/ext/sfst_machine/determinise.cc +0 -303
  41. data/ext/sfst_machine/fst.cc +0 -1000
  42. data/ext/sfst_machine/fst.h +0 -369
  43. data/ext/sfst_machine/interface.cc +0 -1842
  44. data/ext/sfst_machine/interface.h +0 -93
  45. data/ext/sfst_machine/make-compact.cc +0 -327
  46. data/ext/sfst_machine/mem.h +0 -74
  47. data/ext/sfst_machine/operators.cc +0 -1131
  48. data/ext/sfst_machine/sgi.h +0 -44
  49. data/ext/sfst_machine/utf8.cc +0 -146
  50. data/test/test_sfst.fst +0 -3
  51. data/test/test_sfst.rb +0 -114
@@ -1,93 +0,0 @@
1
- /*******************************************************************/
2
- /* */
3
- /* FILE interface.h */
4
- /* MODULE interface */
5
- /* PROGRAM SFST */
6
- /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
- /* */
8
- /*******************************************************************/
9
-
10
- #ifndef _INTERFACE_H_
11
- #define _INTERFACE_H_
12
-
13
- #include "utf8.h"
14
- #include "fst.h"
15
-
16
- typedef enum {twol_left,twol_right,twol_both} Twol_Type;
17
-
18
- typedef enum {repl_left,repl_right,repl_up,repl_down} Repl_Type;
19
-
20
- typedef struct range_t {
21
- Character character;
22
- struct range_t *next;
23
- } Range;
24
-
25
- typedef struct ranges_t {
26
- Range *range;
27
- struct ranges_t *next;
28
- } Ranges;
29
-
30
-
31
- typedef struct contexts_t {
32
- Transducer *left, *right;
33
- struct contexts_t *next;
34
- } Contexts;
35
-
36
-
37
- extern bool Verbose;
38
- extern bool UTF8;
39
- extern char *FileName;
40
- extern Alphabet TheAlphabet;
41
-
42
- void error2( char *message, char *input );
43
- Transducer *new_transducer( Range*, Range* );
44
- Transducer *read_words( char *filename );
45
- Transducer *read_transducer( char *filename );
46
- Transducer *var_value( char *name );
47
- Transducer *rvar_value( char *name );
48
- Range *svar_value( char *name );
49
- Range *complement_range( Range* );
50
- Range *rsvar_value( char *name );
51
- Character character_code( unsigned int uc );
52
- Character symbol_code( char *s );
53
-
54
- Range *add_value( Character, Range*);
55
- Range *add_var_values( char *name, Range*);
56
- Range *add_values( unsigned int, unsigned int, Range*);
57
- Range *append_values( Range *r2, Range *r );
58
- void add_alphabet( Transducer* );
59
-
60
- // These functions delete their argument automata
61
-
62
- void def_alphabet( Transducer *a );
63
- bool def_var( char *name, Transducer *a );
64
- bool def_rvar( char *name, Transducer *a );
65
- bool def_svar( char *name, Range *r );
66
- Transducer *explode( Transducer *a );
67
- Transducer *catenate( Transducer *a1, Transducer *a2 );
68
- Transducer *disjunction( Transducer *a1, Transducer *a2 );
69
- Transducer *conjunction( Transducer *a1, Transducer *a2 );
70
- Transducer *subtraction( Transducer *a1, Transducer *a2 );
71
- Transducer *composition( Transducer *a1, Transducer *a2 );
72
- Transducer *restriction( Transducer *a, Twol_Type type, Contexts *c, int );
73
- Transducer *replace( Transducer *a, Repl_Type type, bool optional );
74
- Transducer *replace_in_context( Transducer *a, Repl_Type type, Contexts *c, bool optional );
75
- Transducer *negation( Transducer *a );
76
- Transducer *upper_level( Transducer *a );
77
- Transducer *lower_level( Transducer *a );
78
- Transducer *minimise( Transducer *a );
79
- Transducer *switch_levels( Transducer *a );
80
- Transducer *repetition( Transducer *a );
81
- Transducer *repetition2( Transducer *a );
82
- Transducer *optional( Transducer *a );
83
- Transducer *make_rule( Transducer *lc, Range *r1, Twol_Type type,
84
- Range *r2, Transducer *rc );
85
- Transducer *freely_insert( Transducer *a, Character lc, Character uc );
86
- Transducer *make_mapping( Ranges*, Ranges* );
87
- Ranges *add_range( Range*, Ranges* );
88
- Contexts *make_context( Transducer *l, Transducer *r );
89
- Contexts *add_context( Contexts *nc, Contexts *c );
90
- Transducer *result( Transducer*, bool );
91
- void write_to_file( Transducer*, char *filename);
92
-
93
- #endif
@@ -1,327 +0,0 @@
1
- /*******************************************************************/
2
- /* */
3
- /* FILE make-compact.C */
4
- /* MODULE make-compact */
5
- /* PROGRAM SFST */
6
- /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
- /* */
8
- /* PURPOSE Code needed for generating compact automata */
9
- /* */
10
- /*******************************************************************/
11
-
12
- #include <math.h>
13
-
14
- #include "make-compact.h"
15
-
16
- using std::equal_range;
17
- using std::sort;
18
- using std::cerr;
19
-
20
- class ARC {
21
- public:
22
- int cv;
23
- Label label;
24
- unsigned int target_node;
25
-
26
- bool operator< ( const ARC a ) const {
27
- return cv < a.cv;
28
- };
29
- };
30
-
31
- typedef hash_map<Label, size_t, Label::label_hash, Label::label_eq> LabelNumber;
32
-
33
-
34
- /*******************************************************************/
35
- /* */
36
- /* MakeCompactTransducer::sort */
37
- /* */
38
- /*******************************************************************/
39
-
40
- void MakeCompactTransducer::sort( Level level )
41
-
42
- {
43
- for( unsigned int n=0; n<number_of_nodes; n++) {
44
- unsigned int from=first_arc[n];
45
- unsigned int to=first_arc[n+1];
46
- int l=to-from;
47
-
48
- // copy the arcs to a temporary table
49
- ARC *arc=new ARC[l];
50
- for( unsigned int i=from; i<to; i++) {
51
- arc[i-from].cv = (int)label[i].get_char(level);
52
- // make sure that epsilon arcs are stored at the beginning
53
- // even if epsilon is not 0
54
- if (arc[i-from].cv == (int)Label::epsilon)
55
- arc[i-from].cv = -1;
56
- arc[i-from].label = label[i];
57
- arc[i-from].target_node = target_node[i];
58
- }
59
-
60
- // sort the table
61
- ::sort( arc, arc+l );
62
-
63
- // copy the arcs back to the original table
64
- for( unsigned int i=from; i<to; i++) {
65
- label[i] = arc[i-from].label;
66
- target_node[i] = arc[i-from].target_node;
67
- }
68
-
69
- delete[] arc;
70
- }
71
- }
72
-
73
-
74
- /*******************************************************************/
75
- /* */
76
- /* MakeCompactTransducer::count_arcs */
77
- /* */
78
- /*******************************************************************/
79
-
80
- void MakeCompactTransducer::count_arcs( Node *node, NodeNumbering &index,
81
- long vmark )
82
- {
83
- if (!node->was_visited( vmark )) {
84
- unsigned int n = index[node];
85
- finalp[n] = node->is_final();
86
- first_arc[n] = 0;
87
- Arcs *arcs=node->arcs();
88
- for( ArcsIter p(arcs); p; p++ ) {
89
- Arc *arc=p;
90
- first_arc[n]++;
91
- count_arcs(arc->target_node(), index, vmark);
92
- }
93
- }
94
- }
95
-
96
-
97
- /*******************************************************************/
98
- /* */
99
- /* MakeCompactTransducer::store_arcs */
100
- /* */
101
- /*******************************************************************/
102
-
103
- void MakeCompactTransducer::store_arcs( Node *node, NodeNumbering &index,
104
- long vmark )
105
- {
106
- if (!node->was_visited( vmark )) {
107
- unsigned int n=first_arc[index[node]];
108
- Arcs *arcs=node->arcs();
109
- for( ArcsIter p(arcs); p; p++ ) {
110
- Arc *arc=p;
111
- label[n] = arc->label();
112
- target_node[n++] = index[arc->target_node()];
113
- store_arcs(arc->target_node(), index, vmark);
114
- }
115
- }
116
- }
117
-
118
-
119
- /*******************************************************************/
120
- /* */
121
- /* MakeCompactTransducer::MakeCompactTransducer */
122
- /* */
123
- /*******************************************************************/
124
-
125
- MakeCompactTransducer::MakeCompactTransducer( Transducer &a, Level l )
126
-
127
- {
128
- if (a.is_infinitely_ambiguous()) {
129
- cerr << "Error: resulting transducer contains an infinite loop!\n";
130
- exit(1);
131
- }
132
-
133
- NodeNumbering index(a);
134
-
135
- alphabet.copy(a.alphabet);
136
-
137
- // memory allocation
138
- number_of_nodes = index.number_of_nodes();
139
- finalp = new char[number_of_nodes];
140
- first_arc = new unsigned int[number_of_nodes+1];
141
-
142
- // count the number of outgoing arcs for each node
143
- // and store them in first_arc[]
144
- a.incr_vmark();
145
- count_arcs( a.root_node(), index, a.vmark );
146
- for( int n=number_of_nodes; n>0; n-- )
147
- first_arc[n] = first_arc[n-1];
148
- first_arc[0] = 0;
149
- for( unsigned int n=0; n<number_of_nodes; n++ )
150
- first_arc[n+1] += first_arc[n];
151
- number_of_arcs = first_arc[number_of_nodes];
152
-
153
- // memory allocation
154
- label = new Label[number_of_arcs];
155
- target_node = new unsigned int[number_of_arcs];
156
-
157
- // store the arcs
158
- a.incr_vmark();
159
- store_arcs( a.root_node(), index, a.vmark );
160
-
161
- // sort the arcs
162
- sort( l );
163
- }
164
-
165
-
166
- /*******************************************************************/
167
- /* */
168
- /* MakeCompactTransducer::store_finalp */
169
- /* */
170
- /*******************************************************************/
171
-
172
- void MakeCompactTransducer::store_finalp( FILE *file )
173
-
174
- {
175
- int k=0;
176
- unsigned char n=0;
177
-
178
- for( size_t i=0; i<number_of_nodes; i++ ) {
179
- n = n << 1;
180
- if (finalp[i])
181
- n |= 1;
182
- if (++k == 8) {
183
- fputc(n, file);
184
- n = 0;
185
- k = 0;
186
- }
187
- }
188
- if (k > 0) {
189
- n <<= 8-k;
190
- fputc(n, file);
191
- }
192
- }
193
-
194
-
195
- /*******************************************************************/
196
- /* */
197
- /* MakeCompactTransducer::store_first_arcs */
198
- /* */
199
- /* The data is encoded with the minimal number of bits needed. */
200
- /* */
201
- /*******************************************************************/
202
-
203
- void MakeCompactTransducer::store_first_arcs( FILE *file )
204
-
205
- {
206
- int k=0;
207
- unsigned int n=0;
208
- // compute number of bits required for storing each item
209
- size_t bits=(size_t)ceil(log(number_of_arcs+1)/log(2));
210
-
211
- for( size_t i=0; i<=number_of_nodes; i++ ) {
212
- unsigned int m=first_arc[i];
213
- m <<= (sizeof(n)*8) - bits;
214
- m >>= k;
215
- n = n | m;
216
- k += bits;
217
- if (k >= (int)sizeof(n)*8) {
218
- fwrite(&n, sizeof(n), 1, file);
219
- k -= sizeof(n) * 8;
220
- n = first_arc[i];
221
- if (k == 0)
222
- n = 0;
223
- else
224
- n = first_arc[i] << (sizeof(n) * 8 - k);
225
- }
226
- }
227
- if (k > 0)
228
- fwrite(&n, sizeof(n), 1, file);
229
- }
230
-
231
-
232
- /*******************************************************************/
233
- /* */
234
- /* MakeCompactTransducer::store_target_nodes */
235
- /* */
236
- /*******************************************************************/
237
-
238
- void MakeCompactTransducer::store_target_nodes( FILE *file )
239
-
240
- {
241
- int k=0;
242
- unsigned int n=0;
243
- size_t bits=(size_t)ceil(log(number_of_nodes)/log(2));
244
-
245
- for( size_t i=0; i<number_of_arcs; i++ ) {
246
- unsigned int m=target_node[i];
247
- m <<= (sizeof(n)*8) - bits;
248
- m >>= k;
249
- n = n | m;
250
- k += bits;
251
- if (k >= (int)sizeof(n)*8) {
252
- fwrite(&n, sizeof(n), 1, file);
253
- k -= sizeof(n)*8;
254
- if (k == 0)
255
- n = 0;
256
- else
257
- n = target_node[i] << (sizeof(n) * 8 - k);
258
- }
259
- }
260
- if (k > 0)
261
- fwrite(&n, sizeof(n), 1, file);
262
- }
263
-
264
-
265
- /*******************************************************************/
266
- /* */
267
- /* MakeCompactTransducer::store_labels */
268
- /* */
269
- /*******************************************************************/
270
-
271
- void MakeCompactTransducer::store_labels( FILE *file )
272
-
273
- {
274
- size_t N=0;
275
- LabelNumber LNum;
276
- for( Alphabet::const_iterator it=alphabet.begin();
277
- it != alphabet.end(); it++ )
278
- {
279
- Label l=*it;
280
- LNum[l] = N++;
281
- }
282
-
283
- int k=0;
284
- unsigned int n=0;
285
- size_t bits=(size_t)ceil(log(alphabet.size())/log(2));
286
-
287
- for( size_t i=0; i<number_of_arcs; i++ ) {
288
- unsigned int l = LNum[label[i]];
289
- unsigned int m=l;
290
- m = m << (sizeof(n)*8) - bits;
291
- m = m >> k;
292
- n = n | m;
293
- k += bits;
294
- if (k >= (int)sizeof(n)*8) {
295
- fwrite(&n, sizeof(n), 1, file);
296
- k -= sizeof(n)*8;
297
- if (k == 0)
298
- n = 0;
299
- else
300
- n = l << (sizeof(n) * 8 - k);
301
- }
302
- }
303
- if (k > 0)
304
- fwrite(&n, sizeof(n), 1, file);
305
- }
306
-
307
-
308
- /*******************************************************************/
309
- /* */
310
- /* MakeCompactTransducer::store */
311
- /* */
312
- /*******************************************************************/
313
-
314
- void MakeCompactTransducer::store( FILE *file )
315
-
316
- {
317
- fputc('c',file);
318
- alphabet.store(file);
319
- fwrite(&number_of_nodes, sizeof(number_of_nodes), 1, file);
320
- fwrite(&number_of_arcs, sizeof(number_of_arcs), 1, file);
321
- store_finalp(file);
322
- store_first_arcs(file);
323
- store_labels(file);
324
- store_target_nodes(file);
325
- if (ferror(file))
326
- throw "Error encountered while writing transducer to file\n";
327
- }
@@ -1,74 +0,0 @@
1
- /*******************************************************************/
2
- /* */
3
- /* FILE mem.h */
4
- /* MODULE mem */
5
- /* PROGRAM SFST */
6
- /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
- /* */
8
- /* PURPOSE memory management functions */
9
- /* */
10
- /*******************************************************************/
11
-
12
- #ifndef _MEM_H_
13
- #define _MEM_H_
14
-
15
- #include <stdlib.h>
16
-
17
- #define MEMBUFFER_SIZE 100000
18
-
19
-
20
- /***************** class Mem *************************************/
21
-
22
- class Mem {
23
-
24
- private:
25
-
26
- struct MemBuffer {
27
- char buffer[MEMBUFFER_SIZE];
28
- struct MemBuffer *next;
29
- };
30
-
31
- MemBuffer *first_buffer;
32
- long pos;
33
- void add_buffer() {
34
- MemBuffer *mb=(MemBuffer*)malloc(sizeof(MemBuffer));
35
- mb->next = first_buffer;
36
- first_buffer = mb;
37
- pos = 0;
38
- }
39
-
40
- public:
41
- Mem() { first_buffer = NULL; add_buffer(); }
42
- ~Mem() { clear(); }
43
-
44
- void clear() {
45
- while (first_buffer) {
46
- MemBuffer *next = first_buffer->next;
47
- free(first_buffer);
48
- first_buffer = next;
49
- }
50
- pos = 0;
51
- }
52
-
53
- void *alloc( size_t n ) {
54
- void *result;
55
-
56
- /* do memory alignment to multiples of 4 */
57
- if (n % 4)
58
- n += 4 - (n % 4);
59
-
60
- if (first_buffer == NULL || pos+n > MEMBUFFER_SIZE)
61
- add_buffer();
62
- if (pos+n > MEMBUFFER_SIZE)
63
- throw "Allocation of memory block larger than MEMBUFFER_SIZE attempted!";
64
-
65
- result = (void*)(first_buffer->buffer + pos);
66
- pos += n;
67
- return result;
68
- }
69
-
70
- //class MemError {};
71
-
72
- };
73
-
74
- #endif