ruby-sfst 0.4.3 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -0
- data/COPYING +280 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +54 -0
- data/README.md +1 -1
- data/Rakefile +9 -18
- data/bin/console +7 -0
- data/bin/setup +6 -0
- data/ext/sfst/alphabet.cc +879 -0
- data/ext/sfst/alphabet.h +302 -0
- data/ext/sfst/basic.cc +85 -0
- data/ext/{sfst_machine → sfst}/basic.h +7 -4
- data/ext/sfst/compact.cc +629 -0
- data/ext/sfst/compact.h +100 -0
- data/ext/sfst/determinise.cc +279 -0
- data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
- data/ext/sfst/fst.cc +1150 -0
- data/ext/sfst/fst.h +374 -0
- data/ext/sfst/hopcroft.cc +681 -0
- data/ext/sfst/interface.cc +1921 -0
- data/ext/sfst/interface.h +171 -0
- data/ext/sfst/make-compact.cc +323 -0
- data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
- data/ext/sfst/mem.h +80 -0
- data/ext/sfst/operators.cc +1273 -0
- data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
- data/ext/sfst/sgi.h +72 -0
- data/ext/sfst/utf8.cc +149 -0
- data/ext/{sfst_machine → sfst}/utf8.h +7 -4
- data/lib/sfst.rb +2 -1
- data/lib/sfst/version.rb +1 -1
- data/ruby-sfst.gemspec +23 -23
- metadata +107 -35
- data/ext/sfst_machine/alphabet.cc +0 -812
- data/ext/sfst_machine/alphabet.h +0 -273
- data/ext/sfst_machine/basic.cc +0 -84
- data/ext/sfst_machine/compact.cc +0 -616
- data/ext/sfst_machine/compact.h +0 -98
- data/ext/sfst_machine/determinise.cc +0 -303
- data/ext/sfst_machine/fst.cc +0 -1000
- data/ext/sfst_machine/fst.h +0 -369
- data/ext/sfst_machine/interface.cc +0 -1842
- data/ext/sfst_machine/interface.h +0 -93
- data/ext/sfst_machine/make-compact.cc +0 -327
- data/ext/sfst_machine/mem.h +0 -74
- data/ext/sfst_machine/operators.cc +0 -1131
- data/ext/sfst_machine/sgi.h +0 -44
- data/ext/sfst_machine/utf8.cc +0 -146
- data/test/test_sfst.fst +0 -3
- data/test/test_sfst.rb +0 -114
data/ext/sfst/fst.h
ADDED
@@ -0,0 +1,374 @@
|
|
1
|
+
/*******************************************************************/
|
2
|
+
/* */
|
3
|
+
/* FILE fst.h */
|
4
|
+
/* MODULE fst */
|
5
|
+
/* PROGRAM SFST */
|
6
|
+
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
|
7
|
+
/* */
|
8
|
+
/* PURPOSE finite state tools */
|
9
|
+
/* */
|
10
|
+
/*******************************************************************/
|
11
|
+
|
12
|
+
#ifndef _FST_H_
|
13
|
+
#define _FST_H_
|
14
|
+
|
15
|
+
#include "alphabet.h"
|
16
|
+
|
17
|
+
typedef enum { Joint, UpperOnly, LowerOnly, Both } OutputType;
|
18
|
+
|
19
|
+
|
20
|
+
/*******************************************************************/
|
21
|
+
/* include commands */
|
22
|
+
/*******************************************************************/
|
23
|
+
|
24
|
+
#include <string>
|
25
|
+
#include <vector>
|
26
|
+
#include <map>
|
27
|
+
#include <set>
|
28
|
+
|
29
|
+
using std::map;
|
30
|
+
using std::set;
|
31
|
+
using std::vector;
|
32
|
+
using std::istream;
|
33
|
+
using std::ostream;
|
34
|
+
|
35
|
+
#include "mem.h"
|
36
|
+
|
37
|
+
namespace SFST {
|
38
|
+
|
39
|
+
// data type for table indices
|
40
|
+
typedef unsigned Index;
|
41
|
+
static const Index undef = (Index)(-1);
|
42
|
+
|
43
|
+
// data type of the generation counter for transducer traversal
|
44
|
+
typedef unsigned short VType;
|
45
|
+
|
46
|
+
extern int Quiet;
|
47
|
+
|
48
|
+
class Node;
|
49
|
+
class Arc;
|
50
|
+
class Arcs;
|
51
|
+
class Transducer;
|
52
|
+
class Node2Int;
|
53
|
+
|
54
|
+
class Transition;
|
55
|
+
|
56
|
+
struct hashf {
|
57
|
+
size_t operator()(const Node *n) const { return (size_t) n; }
|
58
|
+
};
|
59
|
+
typedef hash_set<const Node*, hashf> NodeHashSet;
|
60
|
+
|
61
|
+
/***************** class Arc *************************************/
|
62
|
+
|
63
|
+
class Arc {
|
64
|
+
|
65
|
+
private:
|
66
|
+
Label l;
|
67
|
+
Node *target;
|
68
|
+
Arc *next;
|
69
|
+
|
70
|
+
public:
|
71
|
+
void init( Label ll, Node *node ) { l=ll; target=node; };
|
72
|
+
Label label( void ) const { return l; };
|
73
|
+
Node *target_node( void ) { return target; };
|
74
|
+
const Node *target_node( void ) const { return target; };
|
75
|
+
|
76
|
+
friend class Arcs;
|
77
|
+
friend class ArcsIter;
|
78
|
+
};
|
79
|
+
|
80
|
+
|
81
|
+
/***************** class Arcs ************************************/
|
82
|
+
|
83
|
+
class Arcs {
|
84
|
+
|
85
|
+
private:
|
86
|
+
Arc *first_arcp;
|
87
|
+
Arc *first_epsilon_arcp;
|
88
|
+
|
89
|
+
public:
|
90
|
+
void init( void ) { first_arcp = first_epsilon_arcp = NULL; };
|
91
|
+
Arcs( void ) { init(); };
|
92
|
+
Node *target_node( Label l );
|
93
|
+
const Node *target_node( Label l ) const;
|
94
|
+
void add_arc( Label, Node*, Transducer* );
|
95
|
+
int remove_arc( Arc* );
|
96
|
+
bool is_empty( void ) const {
|
97
|
+
return !(first_arcp || first_epsilon_arcp);
|
98
|
+
};
|
99
|
+
bool epsilon_transition_exists( void ) const {
|
100
|
+
return first_epsilon_arcp != NULL;
|
101
|
+
};
|
102
|
+
bool non_epsilon_transition_exists( void ) const {
|
103
|
+
return first_arcp != NULL;
|
104
|
+
};
|
105
|
+
int size( void ) const;
|
106
|
+
|
107
|
+
friend class ArcsIter;
|
108
|
+
};
|
109
|
+
|
110
|
+
|
111
|
+
/***************** class ArcsIter ********************************/
|
112
|
+
|
113
|
+
class ArcsIter {
|
114
|
+
|
115
|
+
// ArcsIter iterates over the arcs starting with epsilon arcs
|
116
|
+
|
117
|
+
private:
|
118
|
+
Arc *current_arcp;
|
119
|
+
Arc *more_arcs;
|
120
|
+
|
121
|
+
public:
|
122
|
+
typedef enum {all,non_eps,eps} IterType;
|
123
|
+
|
124
|
+
ArcsIter( const Arcs *arcs, IterType type=all ) {
|
125
|
+
more_arcs = NULL;
|
126
|
+
if (type == all) {
|
127
|
+
if (arcs->first_epsilon_arcp) {
|
128
|
+
current_arcp = arcs->first_epsilon_arcp;
|
129
|
+
more_arcs = arcs->first_arcp;
|
130
|
+
}
|
131
|
+
else
|
132
|
+
current_arcp = arcs->first_arcp;
|
133
|
+
}
|
134
|
+
else if (type == non_eps)
|
135
|
+
current_arcp = arcs->first_arcp;
|
136
|
+
else
|
137
|
+
current_arcp = arcs->first_epsilon_arcp;
|
138
|
+
};
|
139
|
+
|
140
|
+
void operator++( int ) {
|
141
|
+
if (current_arcp) {
|
142
|
+
current_arcp = current_arcp->next;
|
143
|
+
if (!current_arcp && more_arcs) {
|
144
|
+
current_arcp = more_arcs;
|
145
|
+
more_arcs = NULL;
|
146
|
+
}
|
147
|
+
}
|
148
|
+
};
|
149
|
+
operator Arc*( void ) const { return current_arcp; };
|
150
|
+
|
151
|
+
};
|
152
|
+
|
153
|
+
|
154
|
+
/***************** class Node ************************************/
|
155
|
+
|
156
|
+
class Node {
|
157
|
+
|
158
|
+
private:
|
159
|
+
Arcs arcsp;
|
160
|
+
Node *forwardp;
|
161
|
+
VType visited;
|
162
|
+
bool final;
|
163
|
+
|
164
|
+
public:
|
165
|
+
Index index;
|
166
|
+
Node( void ) { init(); };
|
167
|
+
void init( void );
|
168
|
+
bool is_final( void ) const { return final; };
|
169
|
+
void set_final( bool flag ) { final = flag; };
|
170
|
+
void set_forward( Node *node ) { forwardp = node; };
|
171
|
+
const Node *target_node( Label l ) const { return arcs()->target_node(l); };
|
172
|
+
Node *target_node( Label l ) { return arcs()->target_node(l); };
|
173
|
+
void add_arc( Label l, Node *n, Transducer *a ) { arcs()->add_arc(l, n, a); };
|
174
|
+
Arcs *arcs( void ) { return &arcsp; };
|
175
|
+
const Arcs *arcs( void ) const { return &arcsp; };
|
176
|
+
Node *forward( void ) { return forwardp; };
|
177
|
+
void clear_visited( NodeHashSet &nodeset );
|
178
|
+
bool was_visited( VType vmark ) {
|
179
|
+
if (visited == vmark)
|
180
|
+
return true;
|
181
|
+
visited = vmark;
|
182
|
+
return false;
|
183
|
+
};
|
184
|
+
bool check_visited( VType vm ) // leaves the visited flag unchanged
|
185
|
+
{ return (visited==vm); };
|
186
|
+
};
|
187
|
+
|
188
|
+
|
189
|
+
/***************** class PairMapping ****************************/
|
190
|
+
|
191
|
+
class PairMapping {
|
192
|
+
// This class is used to map a node pair from two transducers
|
193
|
+
// to a single node in another transducer
|
194
|
+
|
195
|
+
typedef std::pair<Node*, Node*> NodePair;
|
196
|
+
|
197
|
+
private:
|
198
|
+
struct hashf {
|
199
|
+
size_t operator()(const NodePair p) const {
|
200
|
+
return (size_t)p.first ^ (size_t)p.second;
|
201
|
+
}
|
202
|
+
};
|
203
|
+
struct equalf {
|
204
|
+
int operator()(const NodePair p1, const NodePair p2) const {
|
205
|
+
return (p1.first==p2.first && p1.second == p2.second);
|
206
|
+
}
|
207
|
+
};
|
208
|
+
typedef hash_map<NodePair, Node*, hashf, equalf> PairMap;
|
209
|
+
PairMap pm;
|
210
|
+
|
211
|
+
public:
|
212
|
+
typedef PairMap::iterator iterator;
|
213
|
+
iterator begin( void ) { return pm.begin(); };
|
214
|
+
iterator end( void ) { return pm.end(); };
|
215
|
+
iterator find( Node *n1, Node *n2 )
|
216
|
+
{ return pm.find( NodePair(n1,n2) ); };
|
217
|
+
Node* &operator[]( NodePair p ) { return pm.operator[](p); };
|
218
|
+
|
219
|
+
};
|
220
|
+
|
221
|
+
|
222
|
+
/***************** class Transducer *******************************/
|
223
|
+
|
224
|
+
class Transducer {
|
225
|
+
|
226
|
+
private:
|
227
|
+
Node root;
|
228
|
+
Mem mem;
|
229
|
+
|
230
|
+
size_t node_count;
|
231
|
+
size_t transition_count;
|
232
|
+
|
233
|
+
typedef set<Label, Label::label_cmp> LabelSet;
|
234
|
+
typedef hash_map<Character, char*> SymbolMap;
|
235
|
+
|
236
|
+
void incr_vmark( void ) {
|
237
|
+
if (++vmark == 0) {
|
238
|
+
NodeHashSet nodes;
|
239
|
+
root.clear_visited( nodes );
|
240
|
+
fprintf(stderr,"clearing flags\n");
|
241
|
+
vmark = 1;
|
242
|
+
}
|
243
|
+
};
|
244
|
+
void reverse_node( Node *old_node, Transducer *new_node );
|
245
|
+
Label recode_label( Label, bool lswitch, bool recode, Alphabet& );
|
246
|
+
Node *copy_nodes( Node *n, Transducer *a,
|
247
|
+
bool lswitch=false, bool recode=false );
|
248
|
+
void rec_cat_nodes( Node*, Node* );
|
249
|
+
void negate_nodes( Node*, Node* );
|
250
|
+
bool compare_nodes( Node *node, Node *node2, Transducer &a2 );
|
251
|
+
void map_nodes( Node *node, Node *node2, Transducer *a, Level level );
|
252
|
+
void freely_insert_at_node( Node *node, Label l );
|
253
|
+
int print_strings_node(Node *node, char *buffer, int pos, FILE *file, bool);
|
254
|
+
bool infinitely_ambiguous_node( Node* );
|
255
|
+
bool is_cyclic_node( Node*, NodeHashSet &visited );
|
256
|
+
bool is_automaton_node( Node* );
|
257
|
+
void store_symbols( Node*, SymbolMap&, LabelSet& );
|
258
|
+
|
259
|
+
void splice_nodes(Node*, Node*, Label sl, Transducer*, Transducer*);
|
260
|
+
void splice_arc( Node*, Node*, Node*, Transducer* );
|
261
|
+
void enumerate_paths_node( Node*, vector<Label>&, NodeHashSet&,
|
262
|
+
vector<Transducer*>& );
|
263
|
+
void replace_char2( Node*, Node*, Character, Character, Transducer* );
|
264
|
+
Node *create_node( vector<Node*>&, char*, size_t line );
|
265
|
+
void read_transducer_binary( FILE* );
|
266
|
+
void read_transducer_text( FILE* );
|
267
|
+
|
268
|
+
void build_TT( Node *node, vector<Transition> &transtab );
|
269
|
+
size_t size_node( Node *node );
|
270
|
+
|
271
|
+
void index_nodes( Node*, vector<Node*>* );
|
272
|
+
|
273
|
+
public:
|
274
|
+
VType vmark;
|
275
|
+
bool deterministic;
|
276
|
+
bool minimised;
|
277
|
+
bool indexed;
|
278
|
+
|
279
|
+
Alphabet alphabet; // The set of all labels, i.e. character pairs
|
280
|
+
|
281
|
+
Transducer( bool empty=false ) : root(), mem() {
|
282
|
+
vmark = 0;
|
283
|
+
deterministic = minimised = empty;
|
284
|
+
indexed = false;
|
285
|
+
node_count = transition_count = 0;
|
286
|
+
};
|
287
|
+
|
288
|
+
Transducer( Transducer&, vector<size_t>&, size_t );
|
289
|
+
|
290
|
+
// convertion of a string to an transducer
|
291
|
+
Transducer( char *s, const Alphabet *a=NULL, bool extended=false );
|
292
|
+
// reads a word list from a file and stores it in the transducer
|
293
|
+
Transducer( istream&, const Alphabet *a=NULL, bool verbose=false,
|
294
|
+
bool lexcomments=false );
|
295
|
+
// reads a transducer from a binary or text file
|
296
|
+
Transducer( FILE*, bool binary=true );
|
297
|
+
// turns a sequence of labels into a transducer
|
298
|
+
Transducer( vector<Label>& );
|
299
|
+
|
300
|
+
// HFST additions...
|
301
|
+
Transducer &expand( set<char*> &s );
|
302
|
+
Node *expand_nodes( Node *node, Transducer *a, set<char*> &s );
|
303
|
+
void expand_node( Node *origin, Label &l, Node *target, Transducer *a, set<char*> &s );
|
304
|
+
void copy_nodes( Node *search_node, Transducer *copy_tr,
|
305
|
+
Node *start_node,
|
306
|
+
map<int, Node*> &mapper );
|
307
|
+
Transducer &remove_epsilons();
|
308
|
+
// ...HFST additions end
|
309
|
+
|
310
|
+
Node *root_node( void ) { return &root; }; // returns the root node
|
311
|
+
const Node *root_node( void ) const { return &root; }; // returns the root node
|
312
|
+
Node *new_node( void ); // memory alocation for a new node
|
313
|
+
Arc *new_arc( Label l, Node *target ); // memory alocation for a new arc
|
314
|
+
void add_string( char *s, bool extended=false, Alphabet *a=NULL );
|
315
|
+
void complete_alphabet( void );
|
316
|
+
void minimise_alphabet( void );
|
317
|
+
std::pair<size_t,size_t> nodeindexing( vector<Node*> *nodearray=NULL );
|
318
|
+
|
319
|
+
int print_strings( FILE*, bool with_brackets=true ); //enumerate all strings
|
320
|
+
|
321
|
+
bool analyze_string( char *s, FILE *file, bool with_brackets=true );
|
322
|
+
bool generate_string( char *s, FILE *file, bool with_brackets=true );
|
323
|
+
void generate( FILE *file, int max=-1, OutputType ot=Joint );
|
324
|
+
|
325
|
+
void clear( void ); // clears the transducer. The resulting transducer
|
326
|
+
// is like one created with Transducer()
|
327
|
+
// copy duplicates a transducer
|
328
|
+
// if called with a non-zero first argument, upper and lower level are switched
|
329
|
+
// if called with an alphabet as second argument, the label encoding
|
330
|
+
// of the second argument is transferred to the transducer copy
|
331
|
+
Transducer ©( bool lswitch=false, const Alphabet *al=NULL );
|
332
|
+
Transducer &switch_levels( void ) { return copy( true ); };
|
333
|
+
Transducer &splice( Label l, Transducer *a);
|
334
|
+
Transducer &freely_insert( Label l );
|
335
|
+
Transducer &replace_char( Character c, Character nc );
|
336
|
+
Transducer &level( Level );
|
337
|
+
Transducer &lower_level( void ) // creates an transducer for the "lower" language
|
338
|
+
{ return level(lower); };
|
339
|
+
Transducer &upper_level( void ) // creates an transducer for the "upper" language
|
340
|
+
{ return level(upper); };
|
341
|
+
Transducer &determinise( bool copy_alphabet=true ); // creates a deterministic transducer
|
342
|
+
Transducer &minimise( bool verbose=true );
|
343
|
+
void store( FILE* ); // stores the transducer in binary format
|
344
|
+
void store_lowmem( FILE* );
|
345
|
+
void read( FILE* ); // reads an transducer in binary format
|
346
|
+
bool enumerate_paths( vector<Transducer*>& );
|
347
|
+
|
348
|
+
size_t size();
|
349
|
+
|
350
|
+
void build_transtab( vector<Transition> &transtab );
|
351
|
+
|
352
|
+
Transducer &reverse( bool copy_alphabet=true ); // reverse language
|
353
|
+
Transducer &operator|( Transducer& ); // union, disjunction
|
354
|
+
Transducer &operator+( Transducer& ); // concatenation
|
355
|
+
Transducer &operator/( Transducer& ); // subtraction
|
356
|
+
Transducer &operator&( Transducer& ); // intersection, conjunction
|
357
|
+
Transducer &operator||( Transducer& ); // composition
|
358
|
+
Transducer &operator!( void ); // complement, negation
|
359
|
+
Transducer &kleene_star( void );
|
360
|
+
bool operator==( Transducer& ); // minimises its arguments first
|
361
|
+
|
362
|
+
bool is_cyclic( void );
|
363
|
+
bool is_automaton( void );
|
364
|
+
bool is_infinitely_ambiguous( void );
|
365
|
+
bool is_empty( void ); // For efficiency reasons, these functions
|
366
|
+
bool generates_empty_string( void );// are better called after minimisation
|
367
|
+
|
368
|
+
friend class EdgeCount;
|
369
|
+
friend class MakeCompactTransducer;
|
370
|
+
friend class Minimiser;
|
371
|
+
friend ostream &operator<<(ostream&, Transducer&);
|
372
|
+
};
|
373
|
+
}
|
374
|
+
#endif
|
@@ -0,0 +1,681 @@
|
|
1
|
+
|
2
|
+
/*******************************************************************/
|
3
|
+
/* */
|
4
|
+
/* FILE hopcroft.C */
|
5
|
+
/* MODULE hopcroft */
|
6
|
+
/* PROGRAM SFST */
|
7
|
+
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
|
8
|
+
/* */
|
9
|
+
/*******************************************************************/
|
10
|
+
|
11
|
+
#include "fst.h"
|
12
|
+
|
13
|
+
// HFST
|
14
|
+
namespace SFST
|
15
|
+
{
|
16
|
+
|
17
|
+
|
18
|
+
/*******************************************************************/
|
19
|
+
/* */
|
20
|
+
/* Transducer::rev_det_minimise */
|
21
|
+
/* */
|
22
|
+
/*******************************************************************/
|
23
|
+
|
24
|
+
#if 0
|
25
|
+
// alternative less efficient minimisation algorithm
|
26
|
+
Transducer &Transducer::rev_det_minimise( bool verbose )
|
27
|
+
|
28
|
+
{
|
29
|
+
if (minimised)
|
30
|
+
return copy();
|
31
|
+
|
32
|
+
Transducer *a1, *a2;
|
33
|
+
|
34
|
+
a1 = &reverse();
|
35
|
+
a2 = &a1->determinise();
|
36
|
+
delete a1;
|
37
|
+
|
38
|
+
a1 = &a2->reverse();
|
39
|
+
delete a2;
|
40
|
+
|
41
|
+
a2 = &a1->determinise();
|
42
|
+
delete a1;
|
43
|
+
|
44
|
+
a2->minimised = true;
|
45
|
+
a2->minimise_alphabet();
|
46
|
+
|
47
|
+
return *a2;
|
48
|
+
}
|
49
|
+
#endif
|
50
|
+
|
51
|
+
|
52
|
+
/***************** class Minimiser *****************************/
|
53
|
+
|
54
|
+
class Minimiser {
|
55
|
+
|
56
|
+
|
57
|
+
/***************** class Transition **************************/
|
58
|
+
|
59
|
+
class Transition {
|
60
|
+
|
61
|
+
public:
|
62
|
+
Index source;
|
63
|
+
Index next_for_target;
|
64
|
+
Index next_for_label;
|
65
|
+
Label label;
|
66
|
+
|
67
|
+
Transition( Index s, Label l, Index n ) {
|
68
|
+
source = s;
|
69
|
+
label = l;
|
70
|
+
next_for_target = n;
|
71
|
+
next_for_label = undef;
|
72
|
+
}
|
73
|
+
};
|
74
|
+
|
75
|
+
|
76
|
+
/***************** class State *******************************/
|
77
|
+
|
78
|
+
class State {
|
79
|
+
|
80
|
+
public:
|
81
|
+
Index group; // index of group to which this state belongs
|
82
|
+
Index next_in_group; // index of next state in group
|
83
|
+
Index previous_in_group; // index of previous state in group
|
84
|
+
Index first_transition; // index of first transition with this
|
85
|
+
// state as target
|
86
|
+
|
87
|
+
State() {
|
88
|
+
group = next_in_group = previous_in_group = undef;
|
89
|
+
first_transition = undef;
|
90
|
+
}
|
91
|
+
};
|
92
|
+
|
93
|
+
|
94
|
+
/***************** class StateGroup **************************/
|
95
|
+
|
96
|
+
class StateGroup {
|
97
|
+
|
98
|
+
public:
|
99
|
+
Index next; // index of next source group
|
100
|
+
Index next_in_agenda;
|
101
|
+
Index previous_in_agenda;
|
102
|
+
|
103
|
+
Index size; // number of states in this group
|
104
|
+
Index first_state; // pointer to first state
|
105
|
+
|
106
|
+
Index new_size;
|
107
|
+
Index first_new_state; // pointer to the set of intersection states
|
108
|
+
|
109
|
+
void init( Index i ) {
|
110
|
+
next_in_agenda = i;
|
111
|
+
size = new_size = 0;
|
112
|
+
next = first_state = first_new_state = undef;
|
113
|
+
}
|
114
|
+
bool is_empty() {
|
115
|
+
return first_state == undef;
|
116
|
+
}
|
117
|
+
};
|
118
|
+
|
119
|
+
|
120
|
+
/***************** class Agenda *****************************/
|
121
|
+
|
122
|
+
class Agenda {
|
123
|
+
|
124
|
+
static const Index bucket_count = (Index)(sizeof(Index) * 8);
|
125
|
+
// the first "bucket_count" many groups are dummy groups
|
126
|
+
// used as the agenda buckets
|
127
|
+
|
128
|
+
vector<StateGroup> &group;
|
129
|
+
|
130
|
+
public:
|
131
|
+
|
132
|
+
Agenda( vector<StateGroup> &g ) : group(g) {
|
133
|
+
// allocate some dummy groups for the agenda
|
134
|
+
g.resize(bucket_count);
|
135
|
+
for( Index i=0; i<bucket_count; i++ )
|
136
|
+
group[i].next_in_agenda = group[i].previous_in_agenda = i;
|
137
|
+
}
|
138
|
+
|
139
|
+
Index pop() {
|
140
|
+
for( Index i=0; i<bucket_count; i++ ) {
|
141
|
+
if (group[i].next_in_agenda != i) {
|
142
|
+
Index result = group[i].next_in_agenda;
|
143
|
+
erase( result );
|
144
|
+
return result;
|
145
|
+
}
|
146
|
+
}
|
147
|
+
return undef;
|
148
|
+
}
|
149
|
+
|
150
|
+
void add( Index g, Index size ) {
|
151
|
+
|
152
|
+
// find the bucket
|
153
|
+
Index i;
|
154
|
+
for( i=0; (size >>= 1); i++ ) ;
|
155
|
+
|
156
|
+
// insert the new group
|
157
|
+
Index next = group[i].next_in_agenda;
|
158
|
+
group[i].next_in_agenda = g;
|
159
|
+
group[g].next_in_agenda = next;
|
160
|
+
group[g].previous_in_agenda = i;
|
161
|
+
group[next].previous_in_agenda = g;
|
162
|
+
}
|
163
|
+
|
164
|
+
void erase( Index g ) {
|
165
|
+
// update the pointers
|
166
|
+
Index next = group[g].next_in_agenda;
|
167
|
+
Index previous = group[g].previous_in_agenda;
|
168
|
+
group[previous].next_in_agenda = next;
|
169
|
+
group[next].previous_in_agenda = previous;
|
170
|
+
|
171
|
+
// unlink the result element
|
172
|
+
group[g].previous_in_agenda = group[g].next_in_agenda = g;
|
173
|
+
}
|
174
|
+
|
175
|
+
bool contains( Index g ) {
|
176
|
+
return (group[g].next_in_agenda != g);
|
177
|
+
}
|
178
|
+
|
179
|
+
Index number_of_buckets() { return bucket_count; }
|
180
|
+
};
|
181
|
+
|
182
|
+
|
183
|
+
/***************************************************************/
|
184
|
+
|
185
|
+
|
186
|
+
Transducer &transducer; // pointer to original transducer
|
187
|
+
size_t number_of_nodes; // node count in original t.
|
188
|
+
size_t number_of_transitions; // transition count in original t.
|
189
|
+
vector<Node*> nodearray; // maps indices to original transducer nodes
|
190
|
+
|
191
|
+
// CAVEAT: Do not use references to elements of the group vector
|
192
|
+
// because they become invalid when the group vector is resized.
|
193
|
+
vector<StateGroup> group;
|
194
|
+
vector<State> state;
|
195
|
+
vector<Transition> transition;
|
196
|
+
Agenda agenda;
|
197
|
+
|
198
|
+
// data structure for the sets of incoming transitions
|
199
|
+
typedef map<Label,Index> Label2TransSet;
|
200
|
+
|
201
|
+
// "first_transition_for_label" maps a label to a list of transitions
|
202
|
+
// to (states in) C that are labelled with the respective label
|
203
|
+
Label2TransSet first_transition_for_label;
|
204
|
+
|
205
|
+
Index first_source_group; // linked list of source groups
|
206
|
+
|
207
|
+
public:
|
208
|
+
Minimiser( Transducer &t );
|
209
|
+
Transducer &result();
|
210
|
+
|
211
|
+
private:
|
212
|
+
// transform the transducer to the representation needed for minimisation
|
213
|
+
void add_transition( Index s, Label l, Index t );
|
214
|
+
void link_state_in( Index &first_state, Index s );
|
215
|
+
void add_state( Index g, Index s );
|
216
|
+
void link_state_out( Index &first_state, Index s );
|
217
|
+
void remove_state( Index g, Index s );
|
218
|
+
void move_state_to_new( Index g, Index s );
|
219
|
+
void merge_state_lists( Index g );
|
220
|
+
|
221
|
+
void compute_source_states( Index g );
|
222
|
+
void process_source_groups( Label l );
|
223
|
+
void split( Index g, Label l );
|
224
|
+
|
225
|
+
Index first_group() { return agenda.number_of_buckets(); }
|
226
|
+
|
227
|
+
Transducer &build_transducer();
|
228
|
+
|
229
|
+
#if 0
|
230
|
+
void print_groups() {
|
231
|
+
fputs("--------------\n", stderr);
|
232
|
+
for( size_t g=first_group(); g<group.size(); g++ ) {
|
233
|
+
fprintf(stderr,"group %lu: ", (unsigned long)g-first_group());
|
234
|
+
if (group[g].first_state != undef) {
|
235
|
+
Index s = group[g].first_state;
|
236
|
+
do {
|
237
|
+
fprintf(stderr,"%lu ", (unsigned long)s);
|
238
|
+
s = state[s].next_in_group;
|
239
|
+
} while (s != group[g].first_state);
|
240
|
+
}
|
241
|
+
if (group[g].first_new_state != undef) {
|
242
|
+
fputs("| ", stderr);
|
243
|
+
Index s = group[g].first_new_state;
|
244
|
+
do {
|
245
|
+
fprintf(stderr,"%lu ", (unsigned long)s);
|
246
|
+
s = state[s].next_in_group;
|
247
|
+
} while (s != group[g].first_new_state);
|
248
|
+
}
|
249
|
+
fputc('\n', stderr);
|
250
|
+
}
|
251
|
+
}
|
252
|
+
#endif
|
253
|
+
};
|
254
|
+
|
255
|
+
|
256
|
+
/*******************************************************************/
|
257
|
+
/* */
|
258
|
+
/* Transducer::minimise */
|
259
|
+
/* */
|
260
|
+
/*******************************************************************/
|
261
|
+
|
262
|
+
Transducer &Transducer::minimise( bool verbose )
|
263
|
+
|
264
|
+
{
|
265
|
+
if (minimised)
|
266
|
+
return copy();
|
267
|
+
|
268
|
+
Transducer *a1 = &reverse( false );
|
269
|
+
Transducer *a2 = &a1->reverse( false );
|
270
|
+
delete a1;
|
271
|
+
a1 = &a2->determinise( false );
|
272
|
+
delete a2;
|
273
|
+
|
274
|
+
Transducer *result = &Minimiser( *a1 ).result();
|
275
|
+
delete a1;
|
276
|
+
|
277
|
+
result->minimised = true;
|
278
|
+
result->alphabet.copy(alphabet);
|
279
|
+
result->minimise_alphabet();
|
280
|
+
|
281
|
+
return *result;
|
282
|
+
}
|
283
|
+
|
284
|
+
|
285
|
+
/*******************************************************************/
|
286
|
+
/* */
|
287
|
+
/* Minimiser::Minimiser */
|
288
|
+
/* */
|
289
|
+
/*******************************************************************/
|
290
|
+
|
291
|
+
Minimiser::Minimiser( Transducer &t )
|
292
|
+
: transducer(t), agenda(group)
|
293
|
+
|
294
|
+
{
|
295
|
+
std::pair<size_t, size_t> NC_TC = t.nodeindexing( &nodearray );
|
296
|
+
number_of_nodes = NC_TC.first;
|
297
|
+
number_of_transitions = NC_TC.second;
|
298
|
+
|
299
|
+
state.resize(number_of_nodes);
|
300
|
+
transition.reserve(number_of_transitions);
|
301
|
+
|
302
|
+
group.reserve(number_of_nodes+first_group());
|
303
|
+
|
304
|
+
// one group for final and non-final transducers resp.
|
305
|
+
Index final = (Index)group.size();
|
306
|
+
group.push_back( StateGroup() );
|
307
|
+
group.back().init( final );
|
308
|
+
|
309
|
+
Index nonfinal = (Index)group.size();
|
310
|
+
group.push_back( StateGroup() );
|
311
|
+
group.back().init(nonfinal);
|
312
|
+
|
313
|
+
// build the transition table
|
314
|
+
for( Index sourceID=0; sourceID<(Index)nodearray.size(); sourceID++ ) {
|
315
|
+
Node *node = nodearray[sourceID];
|
316
|
+
|
317
|
+
if (node->is_final())
|
318
|
+
add_state( final, sourceID );
|
319
|
+
else
|
320
|
+
add_state( nonfinal, sourceID );
|
321
|
+
|
322
|
+
for( ArcsIter p(node->arcs()); p; p++ ) {
|
323
|
+
Arc *arc=p;
|
324
|
+
add_transition( sourceID, arc->label(), arc->target_node()->index );
|
325
|
+
}
|
326
|
+
}
|
327
|
+
}
|
328
|
+
|
329
|
+
|
330
|
+
/*******************************************************************/
|
331
|
+
/* */
|
332
|
+
/* Minimiser::link_state_in */
|
333
|
+
/* */
|
334
|
+
/*******************************************************************/
|
335
|
+
|
336
|
+
void Minimiser::link_state_in( Index &first_state, Index s )
|
337
|
+
|
338
|
+
{
|
339
|
+
if (first_state == undef) {
|
340
|
+
first_state = s;
|
341
|
+
state[s].next_in_group = state[s].previous_in_group = s;
|
342
|
+
}
|
343
|
+
else {
|
344
|
+
Index n = state[first_state].next_in_group;
|
345
|
+
state[first_state].next_in_group = s;
|
346
|
+
state[s].next_in_group = n;
|
347
|
+
state[n].previous_in_group = s;
|
348
|
+
state[s].previous_in_group = first_state;
|
349
|
+
}
|
350
|
+
}
|
351
|
+
|
352
|
+
|
353
|
+
/*******************************************************************/
|
354
|
+
/* */
|
355
|
+
/* Minimiser::add_state */
|
356
|
+
/* */
|
357
|
+
/*******************************************************************/
|
358
|
+
|
359
|
+
void Minimiser::add_state( Index g, Index s )
|
360
|
+
|
361
|
+
{
|
362
|
+
group[g].size++;
|
363
|
+
state[s].group = g;
|
364
|
+
link_state_in( group[g].first_state, s );
|
365
|
+
}
|
366
|
+
|
367
|
+
|
368
|
+
/*******************************************************************/
|
369
|
+
/* */
|
370
|
+
/* Minimiser::link_state_out */
|
371
|
+
/* */
|
372
|
+
/*******************************************************************/
|
373
|
+
|
374
|
+
void Minimiser::link_state_out( Index &first_state, Index s )
|
375
|
+
|
376
|
+
{
|
377
|
+
State &S = state[s];
|
378
|
+
// only state in group ?
|
379
|
+
if (S.next_in_group == s)
|
380
|
+
first_state = undef;
|
381
|
+
else {
|
382
|
+
Index p = S.previous_in_group;
|
383
|
+
Index n = S.next_in_group;
|
384
|
+
state[p].next_in_group = n;
|
385
|
+
state[n].previous_in_group = p;
|
386
|
+
if (first_state == s)
|
387
|
+
first_state = n;
|
388
|
+
}
|
389
|
+
}
|
390
|
+
|
391
|
+
|
392
|
+
/*******************************************************************/
|
393
|
+
/* */
|
394
|
+
/* Minimiser::remove_state */
|
395
|
+
/* */
|
396
|
+
/*******************************************************************/
|
397
|
+
|
398
|
+
void Minimiser::remove_state( Index g, Index s )
|
399
|
+
|
400
|
+
{
|
401
|
+
group[g].size--;
|
402
|
+
link_state_out( group[g].first_state, s );
|
403
|
+
}
|
404
|
+
|
405
|
+
|
406
|
+
/*******************************************************************/
|
407
|
+
/* */
|
408
|
+
/* Minimiser::move_state_to_new */
|
409
|
+
/* */
|
410
|
+
/*******************************************************************/
|
411
|
+
|
412
|
+
void Minimiser::move_state_to_new( Index g, Index s )
|
413
|
+
|
414
|
+
{
|
415
|
+
group[g].size--;
|
416
|
+
group[g].new_size++;
|
417
|
+
|
418
|
+
link_state_out( group[g].first_state, s );
|
419
|
+
link_state_in( group[g].first_new_state, s );
|
420
|
+
}
|
421
|
+
|
422
|
+
|
423
|
+
/*******************************************************************/
|
424
|
+
/* */
|
425
|
+
/* Minimiser::merge_state_lists */
|
426
|
+
/* */
|
427
|
+
/*******************************************************************/
|
428
|
+
|
429
|
+
void Minimiser::merge_state_lists( Index g )
|
430
|
+
|
431
|
+
{
|
432
|
+
Index first1 = group[g].first_state;
|
433
|
+
if (first1 == undef)
|
434
|
+
group[g].first_state = group[g].first_new_state;
|
435
|
+
else {
|
436
|
+
Index first2 = group[g].first_new_state;
|
437
|
+
Index next1 = state[first1].next_in_group;
|
438
|
+
Index next2 = state[first2].next_in_group;
|
439
|
+
state[first1].next_in_group = next2;
|
440
|
+
state[first2].next_in_group = next1;
|
441
|
+
state[next1].previous_in_group = first2;
|
442
|
+
state[next2].previous_in_group = first1;
|
443
|
+
}
|
444
|
+
group[g].first_new_state = undef;
|
445
|
+
group[g].size += group[g].new_size;
|
446
|
+
group[g].new_size = 0;
|
447
|
+
}
|
448
|
+
|
449
|
+
|
450
|
+
/*******************************************************************/
|
451
|
+
/* */
|
452
|
+
/* Minimiser::add_transition */
|
453
|
+
/* */
|
454
|
+
/*******************************************************************/
|
455
|
+
|
456
|
+
void Minimiser::add_transition( Index s, Label l, Index t )
|
457
|
+
|
458
|
+
{
|
459
|
+
Transition T( s, l, state[t].first_transition );
|
460
|
+
state[t].first_transition = (Index)transition.size();
|
461
|
+
transition.push_back(T);
|
462
|
+
}
|
463
|
+
|
464
|
+
|
465
|
+
/*******************************************************************/
|
466
|
+
/* */
|
467
|
+
/* Minimiser::result */
|
468
|
+
/* */
|
469
|
+
/*******************************************************************/
|
470
|
+
|
471
|
+
Transducer &Minimiser::result()
|
472
|
+
|
473
|
+
{
|
474
|
+
if (number_of_nodes == 1)
|
475
|
+
return transducer.copy(); // no need for a minimisation
|
476
|
+
|
477
|
+
Index final = first_group();
|
478
|
+
Index nonfinal = final + 1;
|
479
|
+
if (group[final].is_empty())
|
480
|
+
// no final transducers
|
481
|
+
return *new Transducer( true ); // return an empty transducer
|
482
|
+
|
483
|
+
if (group[nonfinal].is_empty()) {
|
484
|
+
// no non-final transducers
|
485
|
+
group.pop_back();
|
486
|
+
agenda.add(final, group[final].size);
|
487
|
+
}
|
488
|
+
else {
|
489
|
+
agenda.add(final, group[final].size);
|
490
|
+
agenda.add(nonfinal, group[nonfinal].size);
|
491
|
+
}
|
492
|
+
|
493
|
+
Index g;
|
494
|
+
while ((g = agenda.pop()) != undef) {
|
495
|
+
|
496
|
+
compute_source_states( g );
|
497
|
+
|
498
|
+
// for all labels appearing on incoming transitions
|
499
|
+
for( Label2TransSet::iterator it=first_transition_for_label.begin();
|
500
|
+
it!=first_transition_for_label.end(); it++ )
|
501
|
+
{
|
502
|
+
process_source_groups( it->first );
|
503
|
+
}
|
504
|
+
if (group.size() - first_group() == number_of_nodes)
|
505
|
+
break;
|
506
|
+
}
|
507
|
+
Transducer &t = build_transducer();
|
508
|
+
|
509
|
+
return t;
|
510
|
+
}
|
511
|
+
|
512
|
+
|
513
|
+
/*******************************************************************/
|
514
|
+
/* */
|
515
|
+
/* Minimiser::compute_source_states */
|
516
|
+
/* */
|
517
|
+
/*******************************************************************/
|
518
|
+
|
519
|
+
void Minimiser::compute_source_states( Index g )
|
520
|
+
|
521
|
+
{
|
522
|
+
first_transition_for_label.clear();
|
523
|
+
|
524
|
+
// for all states S in C
|
525
|
+
Index first = group[g].first_state;
|
526
|
+
Index s = first;
|
527
|
+
do {
|
528
|
+
State &S = state[s];
|
529
|
+
// for all transitions T into S
|
530
|
+
for( Index t=S.first_transition; t!=undef;
|
531
|
+
t=transition[t].next_for_target )
|
532
|
+
{
|
533
|
+
Transition &T = transition[t];
|
534
|
+
T.next_for_label = undef;
|
535
|
+
// add the transition to the list of
|
536
|
+
// incoming transitions with the same label
|
537
|
+
Label2TransSet::iterator it=first_transition_for_label.find(T.label);
|
538
|
+
if (it == first_transition_for_label.end())
|
539
|
+
// add a new mapping
|
540
|
+
first_transition_for_label[T.label] = t;
|
541
|
+
else {
|
542
|
+
// prepend the new element to the list
|
543
|
+
T.next_for_label = it->second;
|
544
|
+
it->second = t;
|
545
|
+
}
|
546
|
+
}
|
547
|
+
s = S.next_in_group;
|
548
|
+
}
|
549
|
+
while (s != first);
|
550
|
+
}
|
551
|
+
|
552
|
+
|
553
|
+
/*******************************************************************/
|
554
|
+
/* */
|
555
|
+
/* Minimiser::process_source_groups */
|
556
|
+
/* */
|
557
|
+
/*******************************************************************/
|
558
|
+
|
559
|
+
void Minimiser::process_source_groups( Label l )
|
560
|
+
|
561
|
+
{
|
562
|
+
first_source_group = undef;
|
563
|
+
|
564
|
+
// for all incoming transitions with label l
|
565
|
+
for( Index t = first_transition_for_label[l]; t != undef;
|
566
|
+
t = transition[t].next_for_label )
|
567
|
+
{
|
568
|
+
// get the transition, source state, and source state group
|
569
|
+
Transition &T = transition[t];
|
570
|
+
State &S = state[T.source];
|
571
|
+
Index g = S.group;
|
572
|
+
|
573
|
+
// If new, add this group to the list of source groups
|
574
|
+
if (group[g].first_new_state == undef) {
|
575
|
+
group[g].next = first_source_group;
|
576
|
+
first_source_group = S.group;
|
577
|
+
}
|
578
|
+
|
579
|
+
move_state_to_new(g, T.source );
|
580
|
+
}
|
581
|
+
|
582
|
+
// for all source groups
|
583
|
+
for( Index g = first_source_group; g != undef; g = group[g].next ) {
|
584
|
+
if (group[g].size > 0)
|
585
|
+
split( g, l );
|
586
|
+
else
|
587
|
+
merge_state_lists( g );
|
588
|
+
}
|
589
|
+
return;
|
590
|
+
}
|
591
|
+
|
592
|
+
|
593
|
+
|
594
|
+
/*******************************************************************/
|
595
|
+
/* */
|
596
|
+
/* Minimiser::split */
|
597
|
+
/* */
|
598
|
+
/*******************************************************************/
|
599
|
+
|
600
|
+
void Minimiser::split( Index g, Label l )
|
601
|
+
|
602
|
+
{
|
603
|
+
// create a new group
|
604
|
+
Index newg = (Index)group.size();
|
605
|
+
group.push_back( StateGroup() );
|
606
|
+
StateGroup &NewG = group.back();
|
607
|
+
NewG.init( newg );
|
608
|
+
NewG.first_state = group[g].first_new_state;
|
609
|
+
NewG.size = group[g].new_size;
|
610
|
+
group[g].first_new_state = undef;
|
611
|
+
group[g].new_size = 0;
|
612
|
+
Index s = NewG.first_state;
|
613
|
+
|
614
|
+
do {
|
615
|
+
state[s].group = newg;
|
616
|
+
s = state[s].next_in_group;
|
617
|
+
}
|
618
|
+
while (s != NewG.first_state);
|
619
|
+
|
620
|
+
// update the agenda
|
621
|
+
|
622
|
+
if (agenda.contains( g )) {
|
623
|
+
// G was on the agenda
|
624
|
+
agenda.erase(g);
|
625
|
+
agenda.add(g, group[g].size);
|
626
|
+
agenda.add(newg, group[newg].size);
|
627
|
+
}
|
628
|
+
// Otherwise, put the smaller subgroup on the agenda
|
629
|
+
else if (group[g].size < group[newg].size)
|
630
|
+
agenda.add(g, group[g].size);
|
631
|
+
else
|
632
|
+
agenda.add(newg, group[newg].size);
|
633
|
+
|
634
|
+
return;
|
635
|
+
}
|
636
|
+
|
637
|
+
|
638
|
+
/*******************************************************************/
|
639
|
+
/* */
|
640
|
+
/* Minimiser::build_transducer */
|
641
|
+
/* */
|
642
|
+
/*******************************************************************/
|
643
|
+
|
644
|
+
Transducer &Minimiser::build_transducer()
|
645
|
+
|
646
|
+
{
|
647
|
+
Transducer *t = new Transducer( true );
|
648
|
+
t->alphabet.copy(transducer.alphabet);
|
649
|
+
|
650
|
+
|
651
|
+
// create the nodes of the new transducer
|
652
|
+
vector<Node*> node(group.size(), NULL);
|
653
|
+
|
654
|
+
// define the root node
|
655
|
+
node[state[0].group] = t->root_node();
|
656
|
+
|
657
|
+
for( size_t i=first_group(); i<node.size(); i++ )
|
658
|
+
if (node[i] == NULL)
|
659
|
+
node[i] = t->new_node();
|
660
|
+
|
661
|
+
// Add the transitions
|
662
|
+
for( size_t g=first_group(); g<group.size(); g++ ) {
|
663
|
+
Node *old_node = nodearray[group[g].first_state];
|
664
|
+
Node *new_node = node[g];
|
665
|
+
new_node->set_final( old_node->is_final() );
|
666
|
+
|
667
|
+
for( ArcsIter p(old_node->arcs()); p; p++ ) {
|
668
|
+
Arc *arc=p;
|
669
|
+
// Compute the ID of the target state
|
670
|
+
Index ts = (Index)arc->target_node()->index;
|
671
|
+
// Get the node for the corresponding state group
|
672
|
+
Node *target = node[state[ts].group];
|
673
|
+
// Insert the transition
|
674
|
+
new_node->add_arc( arc->label(), target, t );
|
675
|
+
}
|
676
|
+
}
|
677
|
+
|
678
|
+
return *t;
|
679
|
+
}
|
680
|
+
|
681
|
+
}
|