ruby-sfst 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -0
  3. data/COPYING +280 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +54 -0
  6. data/README.md +1 -1
  7. data/Rakefile +9 -18
  8. data/bin/console +7 -0
  9. data/bin/setup +6 -0
  10. data/ext/sfst/alphabet.cc +879 -0
  11. data/ext/sfst/alphabet.h +302 -0
  12. data/ext/sfst/basic.cc +85 -0
  13. data/ext/{sfst_machine → sfst}/basic.h +7 -4
  14. data/ext/sfst/compact.cc +629 -0
  15. data/ext/sfst/compact.h +100 -0
  16. data/ext/sfst/determinise.cc +279 -0
  17. data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
  18. data/ext/sfst/fst.cc +1150 -0
  19. data/ext/sfst/fst.h +374 -0
  20. data/ext/sfst/hopcroft.cc +681 -0
  21. data/ext/sfst/interface.cc +1921 -0
  22. data/ext/sfst/interface.h +171 -0
  23. data/ext/sfst/make-compact.cc +323 -0
  24. data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
  25. data/ext/sfst/mem.h +80 -0
  26. data/ext/sfst/operators.cc +1273 -0
  27. data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
  28. data/ext/sfst/sgi.h +72 -0
  29. data/ext/sfst/utf8.cc +149 -0
  30. data/ext/{sfst_machine → sfst}/utf8.h +7 -4
  31. data/lib/sfst.rb +2 -1
  32. data/lib/sfst/version.rb +1 -1
  33. data/ruby-sfst.gemspec +23 -23
  34. metadata +107 -35
  35. data/ext/sfst_machine/alphabet.cc +0 -812
  36. data/ext/sfst_machine/alphabet.h +0 -273
  37. data/ext/sfst_machine/basic.cc +0 -84
  38. data/ext/sfst_machine/compact.cc +0 -616
  39. data/ext/sfst_machine/compact.h +0 -98
  40. data/ext/sfst_machine/determinise.cc +0 -303
  41. data/ext/sfst_machine/fst.cc +0 -1000
  42. data/ext/sfst_machine/fst.h +0 -369
  43. data/ext/sfst_machine/interface.cc +0 -1842
  44. data/ext/sfst_machine/interface.h +0 -93
  45. data/ext/sfst_machine/make-compact.cc +0 -327
  46. data/ext/sfst_machine/mem.h +0 -74
  47. data/ext/sfst_machine/operators.cc +0 -1131
  48. data/ext/sfst_machine/sgi.h +0 -44
  49. data/ext/sfst_machine/utf8.cc +0 -146
  50. data/test/test_sfst.fst +0 -3
  51. data/test/test_sfst.rb +0 -114
@@ -1,93 +0,0 @@
1
- /*******************************************************************/
2
- /* */
3
- /* FILE interface.h */
4
- /* MODULE interface */
5
- /* PROGRAM SFST */
6
- /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
- /* */
8
- /*******************************************************************/
9
-
10
- #ifndef _INTERFACE_H_
11
- #define _INTERFACE_H_
12
-
13
- #include "utf8.h"
14
- #include "fst.h"
15
-
16
- typedef enum {twol_left,twol_right,twol_both} Twol_Type;
17
-
18
- typedef enum {repl_left,repl_right,repl_up,repl_down} Repl_Type;
19
-
20
- typedef struct range_t {
21
- Character character;
22
- struct range_t *next;
23
- } Range;
24
-
25
- typedef struct ranges_t {
26
- Range *range;
27
- struct ranges_t *next;
28
- } Ranges;
29
-
30
-
31
- typedef struct contexts_t {
32
- Transducer *left, *right;
33
- struct contexts_t *next;
34
- } Contexts;
35
-
36
-
37
- extern bool Verbose;
38
- extern bool UTF8;
39
- extern char *FileName;
40
- extern Alphabet TheAlphabet;
41
-
42
- void error2( char *message, char *input );
43
- Transducer *new_transducer( Range*, Range* );
44
- Transducer *read_words( char *filename );
45
- Transducer *read_transducer( char *filename );
46
- Transducer *var_value( char *name );
47
- Transducer *rvar_value( char *name );
48
- Range *svar_value( char *name );
49
- Range *complement_range( Range* );
50
- Range *rsvar_value( char *name );
51
- Character character_code( unsigned int uc );
52
- Character symbol_code( char *s );
53
-
54
- Range *add_value( Character, Range*);
55
- Range *add_var_values( char *name, Range*);
56
- Range *add_values( unsigned int, unsigned int, Range*);
57
- Range *append_values( Range *r2, Range *r );
58
- void add_alphabet( Transducer* );
59
-
60
- // These functions delete their argument automata
61
-
62
- void def_alphabet( Transducer *a );
63
- bool def_var( char *name, Transducer *a );
64
- bool def_rvar( char *name, Transducer *a );
65
- bool def_svar( char *name, Range *r );
66
- Transducer *explode( Transducer *a );
67
- Transducer *catenate( Transducer *a1, Transducer *a2 );
68
- Transducer *disjunction( Transducer *a1, Transducer *a2 );
69
- Transducer *conjunction( Transducer *a1, Transducer *a2 );
70
- Transducer *subtraction( Transducer *a1, Transducer *a2 );
71
- Transducer *composition( Transducer *a1, Transducer *a2 );
72
- Transducer *restriction( Transducer *a, Twol_Type type, Contexts *c, int );
73
- Transducer *replace( Transducer *a, Repl_Type type, bool optional );
74
- Transducer *replace_in_context( Transducer *a, Repl_Type type, Contexts *c, bool optional );
75
- Transducer *negation( Transducer *a );
76
- Transducer *upper_level( Transducer *a );
77
- Transducer *lower_level( Transducer *a );
78
- Transducer *minimise( Transducer *a );
79
- Transducer *switch_levels( Transducer *a );
80
- Transducer *repetition( Transducer *a );
81
- Transducer *repetition2( Transducer *a );
82
- Transducer *optional( Transducer *a );
83
- Transducer *make_rule( Transducer *lc, Range *r1, Twol_Type type,
84
- Range *r2, Transducer *rc );
85
- Transducer *freely_insert( Transducer *a, Character lc, Character uc );
86
- Transducer *make_mapping( Ranges*, Ranges* );
87
- Ranges *add_range( Range*, Ranges* );
88
- Contexts *make_context( Transducer *l, Transducer *r );
89
- Contexts *add_context( Contexts *nc, Contexts *c );
90
- Transducer *result( Transducer*, bool );
91
- void write_to_file( Transducer*, char *filename);
92
-
93
- #endif
@@ -1,327 +0,0 @@
1
- /*******************************************************************/
2
- /* */
3
- /* FILE make-compact.C */
4
- /* MODULE make-compact */
5
- /* PROGRAM SFST */
6
- /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
- /* */
8
- /* PURPOSE Code needed for generating compact automata */
9
- /* */
10
- /*******************************************************************/
11
-
12
- #include <math.h>
13
-
14
- #include "make-compact.h"
15
-
16
- using std::equal_range;
17
- using std::sort;
18
- using std::cerr;
19
-
20
- class ARC {
21
- public:
22
- int cv;
23
- Label label;
24
- unsigned int target_node;
25
-
26
- bool operator< ( const ARC a ) const {
27
- return cv < a.cv;
28
- };
29
- };
30
-
31
- typedef hash_map<Label, size_t, Label::label_hash, Label::label_eq> LabelNumber;
32
-
33
-
34
- /*******************************************************************/
35
- /* */
36
- /* MakeCompactTransducer::sort */
37
- /* */
38
- /*******************************************************************/
39
-
40
- void MakeCompactTransducer::sort( Level level )
41
-
42
- {
43
- for( unsigned int n=0; n<number_of_nodes; n++) {
44
- unsigned int from=first_arc[n];
45
- unsigned int to=first_arc[n+1];
46
- int l=to-from;
47
-
48
- // copy the arcs to a temporary table
49
- ARC *arc=new ARC[l];
50
- for( unsigned int i=from; i<to; i++) {
51
- arc[i-from].cv = (int)label[i].get_char(level);
52
- // make sure that epsilon arcs are stored at the beginning
53
- // even if epsilon is not 0
54
- if (arc[i-from].cv == (int)Label::epsilon)
55
- arc[i-from].cv = -1;
56
- arc[i-from].label = label[i];
57
- arc[i-from].target_node = target_node[i];
58
- }
59
-
60
- // sort the table
61
- ::sort( arc, arc+l );
62
-
63
- // copy the arcs back to the original table
64
- for( unsigned int i=from; i<to; i++) {
65
- label[i] = arc[i-from].label;
66
- target_node[i] = arc[i-from].target_node;
67
- }
68
-
69
- delete[] arc;
70
- }
71
- }
72
-
73
-
74
- /*******************************************************************/
75
- /* */
76
- /* MakeCompactTransducer::count_arcs */
77
- /* */
78
- /*******************************************************************/
79
-
80
- void MakeCompactTransducer::count_arcs( Node *node, NodeNumbering &index,
81
- long vmark )
82
- {
83
- if (!node->was_visited( vmark )) {
84
- unsigned int n = index[node];
85
- finalp[n] = node->is_final();
86
- first_arc[n] = 0;
87
- Arcs *arcs=node->arcs();
88
- for( ArcsIter p(arcs); p; p++ ) {
89
- Arc *arc=p;
90
- first_arc[n]++;
91
- count_arcs(arc->target_node(), index, vmark);
92
- }
93
- }
94
- }
95
-
96
-
97
- /*******************************************************************/
98
- /* */
99
- /* MakeCompactTransducer::store_arcs */
100
- /* */
101
- /*******************************************************************/
102
-
103
- void MakeCompactTransducer::store_arcs( Node *node, NodeNumbering &index,
104
- long vmark )
105
- {
106
- if (!node->was_visited( vmark )) {
107
- unsigned int n=first_arc[index[node]];
108
- Arcs *arcs=node->arcs();
109
- for( ArcsIter p(arcs); p; p++ ) {
110
- Arc *arc=p;
111
- label[n] = arc->label();
112
- target_node[n++] = index[arc->target_node()];
113
- store_arcs(arc->target_node(), index, vmark);
114
- }
115
- }
116
- }
117
-
118
-
119
- /*******************************************************************/
120
- /* */
121
- /* MakeCompactTransducer::MakeCompactTransducer */
122
- /* */
123
- /*******************************************************************/
124
-
125
- MakeCompactTransducer::MakeCompactTransducer( Transducer &a, Level l )
126
-
127
- {
128
- if (a.is_infinitely_ambiguous()) {
129
- cerr << "Error: resulting transducer contains an infinite loop!\n";
130
- exit(1);
131
- }
132
-
133
- NodeNumbering index(a);
134
-
135
- alphabet.copy(a.alphabet);
136
-
137
- // memory allocation
138
- number_of_nodes = index.number_of_nodes();
139
- finalp = new char[number_of_nodes];
140
- first_arc = new unsigned int[number_of_nodes+1];
141
-
142
- // count the number of outgoing arcs for each node
143
- // and store them in first_arc[]
144
- a.incr_vmark();
145
- count_arcs( a.root_node(), index, a.vmark );
146
- for( int n=number_of_nodes; n>0; n-- )
147
- first_arc[n] = first_arc[n-1];
148
- first_arc[0] = 0;
149
- for( unsigned int n=0; n<number_of_nodes; n++ )
150
- first_arc[n+1] += first_arc[n];
151
- number_of_arcs = first_arc[number_of_nodes];
152
-
153
- // memory allocation
154
- label = new Label[number_of_arcs];
155
- target_node = new unsigned int[number_of_arcs];
156
-
157
- // store the arcs
158
- a.incr_vmark();
159
- store_arcs( a.root_node(), index, a.vmark );
160
-
161
- // sort the arcs
162
- sort( l );
163
- }
164
-
165
-
166
- /*******************************************************************/
167
- /* */
168
- /* MakeCompactTransducer::store_finalp */
169
- /* */
170
- /*******************************************************************/
171
-
172
- void MakeCompactTransducer::store_finalp( FILE *file )
173
-
174
- {
175
- int k=0;
176
- unsigned char n=0;
177
-
178
- for( size_t i=0; i<number_of_nodes; i++ ) {
179
- n = n << 1;
180
- if (finalp[i])
181
- n |= 1;
182
- if (++k == 8) {
183
- fputc(n, file);
184
- n = 0;
185
- k = 0;
186
- }
187
- }
188
- if (k > 0) {
189
- n <<= 8-k;
190
- fputc(n, file);
191
- }
192
- }
193
-
194
-
195
- /*******************************************************************/
196
- /* */
197
- /* MakeCompactTransducer::store_first_arcs */
198
- /* */
199
- /* The data is encoded with the minimal number of bits needed. */
200
- /* */
201
- /*******************************************************************/
202
-
203
- void MakeCompactTransducer::store_first_arcs( FILE *file )
204
-
205
- {
206
- int k=0;
207
- unsigned int n=0;
208
- // compute number of bits required for storing each item
209
- size_t bits=(size_t)ceil(log(number_of_arcs+1)/log(2));
210
-
211
- for( size_t i=0; i<=number_of_nodes; i++ ) {
212
- unsigned int m=first_arc[i];
213
- m <<= (sizeof(n)*8) - bits;
214
- m >>= k;
215
- n = n | m;
216
- k += bits;
217
- if (k >= (int)sizeof(n)*8) {
218
- fwrite(&n, sizeof(n), 1, file);
219
- k -= sizeof(n) * 8;
220
- n = first_arc[i];
221
- if (k == 0)
222
- n = 0;
223
- else
224
- n = first_arc[i] << (sizeof(n) * 8 - k);
225
- }
226
- }
227
- if (k > 0)
228
- fwrite(&n, sizeof(n), 1, file);
229
- }
230
-
231
-
232
- /*******************************************************************/
233
- /* */
234
- /* MakeCompactTransducer::store_target_nodes */
235
- /* */
236
- /*******************************************************************/
237
-
238
- void MakeCompactTransducer::store_target_nodes( FILE *file )
239
-
240
- {
241
- int k=0;
242
- unsigned int n=0;
243
- size_t bits=(size_t)ceil(log(number_of_nodes)/log(2));
244
-
245
- for( size_t i=0; i<number_of_arcs; i++ ) {
246
- unsigned int m=target_node[i];
247
- m <<= (sizeof(n)*8) - bits;
248
- m >>= k;
249
- n = n | m;
250
- k += bits;
251
- if (k >= (int)sizeof(n)*8) {
252
- fwrite(&n, sizeof(n), 1, file);
253
- k -= sizeof(n)*8;
254
- if (k == 0)
255
- n = 0;
256
- else
257
- n = target_node[i] << (sizeof(n) * 8 - k);
258
- }
259
- }
260
- if (k > 0)
261
- fwrite(&n, sizeof(n), 1, file);
262
- }
263
-
264
-
265
- /*******************************************************************/
266
- /* */
267
- /* MakeCompactTransducer::store_labels */
268
- /* */
269
- /*******************************************************************/
270
-
271
- void MakeCompactTransducer::store_labels( FILE *file )
272
-
273
- {
274
- size_t N=0;
275
- LabelNumber LNum;
276
- for( Alphabet::const_iterator it=alphabet.begin();
277
- it != alphabet.end(); it++ )
278
- {
279
- Label l=*it;
280
- LNum[l] = N++;
281
- }
282
-
283
- int k=0;
284
- unsigned int n=0;
285
- size_t bits=(size_t)ceil(log(alphabet.size())/log(2));
286
-
287
- for( size_t i=0; i<number_of_arcs; i++ ) {
288
- unsigned int l = LNum[label[i]];
289
- unsigned int m=l;
290
- m = m << (sizeof(n)*8) - bits;
291
- m = m >> k;
292
- n = n | m;
293
- k += bits;
294
- if (k >= (int)sizeof(n)*8) {
295
- fwrite(&n, sizeof(n), 1, file);
296
- k -= sizeof(n)*8;
297
- if (k == 0)
298
- n = 0;
299
- else
300
- n = l << (sizeof(n) * 8 - k);
301
- }
302
- }
303
- if (k > 0)
304
- fwrite(&n, sizeof(n), 1, file);
305
- }
306
-
307
-
308
- /*******************************************************************/
309
- /* */
310
- /* MakeCompactTransducer::store */
311
- /* */
312
- /*******************************************************************/
313
-
314
- void MakeCompactTransducer::store( FILE *file )
315
-
316
- {
317
- fputc('c',file);
318
- alphabet.store(file);
319
- fwrite(&number_of_nodes, sizeof(number_of_nodes), 1, file);
320
- fwrite(&number_of_arcs, sizeof(number_of_arcs), 1, file);
321
- store_finalp(file);
322
- store_first_arcs(file);
323
- store_labels(file);
324
- store_target_nodes(file);
325
- if (ferror(file))
326
- throw "Error encountered while writing transducer to file\n";
327
- }
@@ -1,74 +0,0 @@
1
- /*******************************************************************/
2
- /* */
3
- /* FILE mem.h */
4
- /* MODULE mem */
5
- /* PROGRAM SFST */
6
- /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
- /* */
8
- /* PURPOSE memory management functions */
9
- /* */
10
- /*******************************************************************/
11
-
12
- #ifndef _MEM_H_
13
- #define _MEM_H_
14
-
15
- #include <stdlib.h>
16
-
17
- #define MEMBUFFER_SIZE 100000
18
-
19
-
20
- /***************** class Mem *************************************/
21
-
22
- class Mem {
23
-
24
- private:
25
-
26
- struct MemBuffer {
27
- char buffer[MEMBUFFER_SIZE];
28
- struct MemBuffer *next;
29
- };
30
-
31
- MemBuffer *first_buffer;
32
- long pos;
33
- void add_buffer() {
34
- MemBuffer *mb=(MemBuffer*)malloc(sizeof(MemBuffer));
35
- mb->next = first_buffer;
36
- first_buffer = mb;
37
- pos = 0;
38
- }
39
-
40
- public:
41
- Mem() { first_buffer = NULL; add_buffer(); }
42
- ~Mem() { clear(); }
43
-
44
- void clear() {
45
- while (first_buffer) {
46
- MemBuffer *next = first_buffer->next;
47
- free(first_buffer);
48
- first_buffer = next;
49
- }
50
- pos = 0;
51
- }
52
-
53
- void *alloc( size_t n ) {
54
- void *result;
55
-
56
- /* do memory alignment to multiples of 4 */
57
- if (n % 4)
58
- n += 4 - (n % 4);
59
-
60
- if (first_buffer == NULL || pos+n > MEMBUFFER_SIZE)
61
- add_buffer();
62
- if (pos+n > MEMBUFFER_SIZE)
63
- throw "Allocation of memory block larger than MEMBUFFER_SIZE attempted!";
64
-
65
- result = (void*)(first_buffer->buffer + pos);
66
- pos += n;
67
- return result;
68
- }
69
-
70
- //class MemError {};
71
-
72
- };
73
-
74
- #endif