ruby-sfst 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -0
  3. data/COPYING +280 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +54 -0
  6. data/README.md +1 -1
  7. data/Rakefile +9 -18
  8. data/bin/console +7 -0
  9. data/bin/setup +6 -0
  10. data/ext/sfst/alphabet.cc +879 -0
  11. data/ext/sfst/alphabet.h +302 -0
  12. data/ext/sfst/basic.cc +85 -0
  13. data/ext/{sfst_machine → sfst}/basic.h +7 -4
  14. data/ext/sfst/compact.cc +629 -0
  15. data/ext/sfst/compact.h +100 -0
  16. data/ext/sfst/determinise.cc +279 -0
  17. data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
  18. data/ext/sfst/fst.cc +1150 -0
  19. data/ext/sfst/fst.h +374 -0
  20. data/ext/sfst/hopcroft.cc +681 -0
  21. data/ext/sfst/interface.cc +1921 -0
  22. data/ext/sfst/interface.h +171 -0
  23. data/ext/sfst/make-compact.cc +323 -0
  24. data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
  25. data/ext/sfst/mem.h +80 -0
  26. data/ext/sfst/operators.cc +1273 -0
  27. data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
  28. data/ext/sfst/sgi.h +72 -0
  29. data/ext/sfst/utf8.cc +149 -0
  30. data/ext/{sfst_machine → sfst}/utf8.h +7 -4
  31. data/lib/sfst.rb +2 -1
  32. data/lib/sfst/version.rb +1 -1
  33. data/ruby-sfst.gemspec +23 -23
  34. metadata +107 -35
  35. data/ext/sfst_machine/alphabet.cc +0 -812
  36. data/ext/sfst_machine/alphabet.h +0 -273
  37. data/ext/sfst_machine/basic.cc +0 -84
  38. data/ext/sfst_machine/compact.cc +0 -616
  39. data/ext/sfst_machine/compact.h +0 -98
  40. data/ext/sfst_machine/determinise.cc +0 -303
  41. data/ext/sfst_machine/fst.cc +0 -1000
  42. data/ext/sfst_machine/fst.h +0 -369
  43. data/ext/sfst_machine/interface.cc +0 -1842
  44. data/ext/sfst_machine/interface.h +0 -93
  45. data/ext/sfst_machine/make-compact.cc +0 -327
  46. data/ext/sfst_machine/mem.h +0 -74
  47. data/ext/sfst_machine/operators.cc +0 -1131
  48. data/ext/sfst_machine/sgi.h +0 -44
  49. data/ext/sfst_machine/utf8.cc +0 -146
  50. data/test/test_sfst.fst +0 -3
  51. data/test/test_sfst.rb +0 -114
@@ -0,0 +1,374 @@
1
+ /*******************************************************************/
2
+ /* */
3
+ /* FILE fst.h */
4
+ /* MODULE fst */
5
+ /* PROGRAM SFST */
6
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
+ /* */
8
+ /* PURPOSE finite state tools */
9
+ /* */
10
+ /*******************************************************************/
11
+
12
+ #ifndef _FST_H_
13
+ #define _FST_H_
14
+
15
+ #include "alphabet.h"
16
+
17
+ typedef enum { Joint, UpperOnly, LowerOnly, Both } OutputType;
18
+
19
+
20
+ /*******************************************************************/
21
+ /* include commands */
22
+ /*******************************************************************/
23
+
24
+ #include <string>
25
+ #include <vector>
26
+ #include <map>
27
+ #include <set>
28
+
29
+ using std::map;
30
+ using std::set;
31
+ using std::vector;
32
+ using std::istream;
33
+ using std::ostream;
34
+
35
+ #include "mem.h"
36
+
37
+ namespace SFST {
38
+
39
+ // data type for table indices
40
+ typedef unsigned Index;
41
+ static const Index undef = (Index)(-1);
42
+
43
+ // data type of the generation counter for transducer traversal
44
+ typedef unsigned short VType;
45
+
46
+ extern int Quiet;
47
+
48
+ class Node;
49
+ class Arc;
50
+ class Arcs;
51
+ class Transducer;
52
+ class Node2Int;
53
+
54
+ class Transition;
55
+
56
+ struct hashf {
57
+ size_t operator()(const Node *n) const { return (size_t) n; }
58
+ };
59
+ typedef hash_set<const Node*, hashf> NodeHashSet;
60
+
61
+ /***************** class Arc *************************************/
62
+
63
+ class Arc {
64
+
65
+ private:
66
+ Label l;
67
+ Node *target;
68
+ Arc *next;
69
+
70
+ public:
71
+ void init( Label ll, Node *node ) { l=ll; target=node; };
72
+ Label label( void ) const { return l; };
73
+ Node *target_node( void ) { return target; };
74
+ const Node *target_node( void ) const { return target; };
75
+
76
+ friend class Arcs;
77
+ friend class ArcsIter;
78
+ };
79
+
80
+
81
+ /***************** class Arcs ************************************/
82
+
83
+ class Arcs {
84
+
85
+ private:
86
+ Arc *first_arcp;
87
+ Arc *first_epsilon_arcp;
88
+
89
+ public:
90
+ void init( void ) { first_arcp = first_epsilon_arcp = NULL; };
91
+ Arcs( void ) { init(); };
92
+ Node *target_node( Label l );
93
+ const Node *target_node( Label l ) const;
94
+ void add_arc( Label, Node*, Transducer* );
95
+ int remove_arc( Arc* );
96
+ bool is_empty( void ) const {
97
+ return !(first_arcp || first_epsilon_arcp);
98
+ };
99
+ bool epsilon_transition_exists( void ) const {
100
+ return first_epsilon_arcp != NULL;
101
+ };
102
+ bool non_epsilon_transition_exists( void ) const {
103
+ return first_arcp != NULL;
104
+ };
105
+ int size( void ) const;
106
+
107
+ friend class ArcsIter;
108
+ };
109
+
110
+
111
+ /***************** class ArcsIter ********************************/
112
+
113
+ class ArcsIter {
114
+
115
+ // ArcsIter iterates over the arcs starting with epsilon arcs
116
+
117
+ private:
118
+ Arc *current_arcp;
119
+ Arc *more_arcs;
120
+
121
+ public:
122
+ typedef enum {all,non_eps,eps} IterType;
123
+
124
+ ArcsIter( const Arcs *arcs, IterType type=all ) {
125
+ more_arcs = NULL;
126
+ if (type == all) {
127
+ if (arcs->first_epsilon_arcp) {
128
+ current_arcp = arcs->first_epsilon_arcp;
129
+ more_arcs = arcs->first_arcp;
130
+ }
131
+ else
132
+ current_arcp = arcs->first_arcp;
133
+ }
134
+ else if (type == non_eps)
135
+ current_arcp = arcs->first_arcp;
136
+ else
137
+ current_arcp = arcs->first_epsilon_arcp;
138
+ };
139
+
140
+ void operator++( int ) {
141
+ if (current_arcp) {
142
+ current_arcp = current_arcp->next;
143
+ if (!current_arcp && more_arcs) {
144
+ current_arcp = more_arcs;
145
+ more_arcs = NULL;
146
+ }
147
+ }
148
+ };
149
+ operator Arc*( void ) const { return current_arcp; };
150
+
151
+ };
152
+
153
+
154
+ /***************** class Node ************************************/
155
+
156
+ class Node {
157
+
158
+ private:
159
+ Arcs arcsp;
160
+ Node *forwardp;
161
+ VType visited;
162
+ bool final;
163
+
164
+ public:
165
+ Index index;
166
+ Node( void ) { init(); };
167
+ void init( void );
168
+ bool is_final( void ) const { return final; };
169
+ void set_final( bool flag ) { final = flag; };
170
+ void set_forward( Node *node ) { forwardp = node; };
171
+ const Node *target_node( Label l ) const { return arcs()->target_node(l); };
172
+ Node *target_node( Label l ) { return arcs()->target_node(l); };
173
+ void add_arc( Label l, Node *n, Transducer *a ) { arcs()->add_arc(l, n, a); };
174
+ Arcs *arcs( void ) { return &arcsp; };
175
+ const Arcs *arcs( void ) const { return &arcsp; };
176
+ Node *forward( void ) { return forwardp; };
177
+ void clear_visited( NodeHashSet &nodeset );
178
+ bool was_visited( VType vmark ) {
179
+ if (visited == vmark)
180
+ return true;
181
+ visited = vmark;
182
+ return false;
183
+ };
184
+ bool check_visited( VType vm ) // leaves the visited flag unchanged
185
+ { return (visited==vm); };
186
+ };
187
+
188
+
189
+ /***************** class PairMapping ****************************/
190
+
191
+ class PairMapping {
192
+ // This class is used to map a node pair from two transducers
193
+ // to a single node in another transducer
194
+
195
+ typedef std::pair<Node*, Node*> NodePair;
196
+
197
+ private:
198
+ struct hashf {
199
+ size_t operator()(const NodePair p) const {
200
+ return (size_t)p.first ^ (size_t)p.second;
201
+ }
202
+ };
203
+ struct equalf {
204
+ int operator()(const NodePair p1, const NodePair p2) const {
205
+ return (p1.first==p2.first && p1.second == p2.second);
206
+ }
207
+ };
208
+ typedef hash_map<NodePair, Node*, hashf, equalf> PairMap;
209
+ PairMap pm;
210
+
211
+ public:
212
+ typedef PairMap::iterator iterator;
213
+ iterator begin( void ) { return pm.begin(); };
214
+ iterator end( void ) { return pm.end(); };
215
+ iterator find( Node *n1, Node *n2 )
216
+ { return pm.find( NodePair(n1,n2) ); };
217
+ Node* &operator[]( NodePair p ) { return pm.operator[](p); };
218
+
219
+ };
220
+
221
+
222
+ /***************** class Transducer *******************************/
223
+
224
+ class Transducer {
225
+
226
+ private:
227
+ Node root;
228
+ Mem mem;
229
+
230
+ size_t node_count;
231
+ size_t transition_count;
232
+
233
+ typedef set<Label, Label::label_cmp> LabelSet;
234
+ typedef hash_map<Character, char*> SymbolMap;
235
+
236
+ void incr_vmark( void ) {
237
+ if (++vmark == 0) {
238
+ NodeHashSet nodes;
239
+ root.clear_visited( nodes );
240
+ fprintf(stderr,"clearing flags\n");
241
+ vmark = 1;
242
+ }
243
+ };
244
+ void reverse_node( Node *old_node, Transducer *new_node );
245
+ Label recode_label( Label, bool lswitch, bool recode, Alphabet& );
246
+ Node *copy_nodes( Node *n, Transducer *a,
247
+ bool lswitch=false, bool recode=false );
248
+ void rec_cat_nodes( Node*, Node* );
249
+ void negate_nodes( Node*, Node* );
250
+ bool compare_nodes( Node *node, Node *node2, Transducer &a2 );
251
+ void map_nodes( Node *node, Node *node2, Transducer *a, Level level );
252
+ void freely_insert_at_node( Node *node, Label l );
253
+ int print_strings_node(Node *node, char *buffer, int pos, FILE *file, bool);
254
+ bool infinitely_ambiguous_node( Node* );
255
+ bool is_cyclic_node( Node*, NodeHashSet &visited );
256
+ bool is_automaton_node( Node* );
257
+ void store_symbols( Node*, SymbolMap&, LabelSet& );
258
+
259
+ void splice_nodes(Node*, Node*, Label sl, Transducer*, Transducer*);
260
+ void splice_arc( Node*, Node*, Node*, Transducer* );
261
+ void enumerate_paths_node( Node*, vector<Label>&, NodeHashSet&,
262
+ vector<Transducer*>& );
263
+ void replace_char2( Node*, Node*, Character, Character, Transducer* );
264
+ Node *create_node( vector<Node*>&, char*, size_t line );
265
+ void read_transducer_binary( FILE* );
266
+ void read_transducer_text( FILE* );
267
+
268
+ void build_TT( Node *node, vector<Transition> &transtab );
269
+ size_t size_node( Node *node );
270
+
271
+ void index_nodes( Node*, vector<Node*>* );
272
+
273
+ public:
274
+ VType vmark;
275
+ bool deterministic;
276
+ bool minimised;
277
+ bool indexed;
278
+
279
+ Alphabet alphabet; // The set of all labels, i.e. character pairs
280
+
281
+ Transducer( bool empty=false ) : root(), mem() {
282
+ vmark = 0;
283
+ deterministic = minimised = empty;
284
+ indexed = false;
285
+ node_count = transition_count = 0;
286
+ };
287
+
288
+ Transducer( Transducer&, vector<size_t>&, size_t );
289
+
290
+ // convertion of a string to an transducer
291
+ Transducer( char *s, const Alphabet *a=NULL, bool extended=false );
292
+ // reads a word list from a file and stores it in the transducer
293
+ Transducer( istream&, const Alphabet *a=NULL, bool verbose=false,
294
+ bool lexcomments=false );
295
+ // reads a transducer from a binary or text file
296
+ Transducer( FILE*, bool binary=true );
297
+ // turns a sequence of labels into a transducer
298
+ Transducer( vector<Label>& );
299
+
300
+ // HFST additions...
301
+ Transducer &expand( set<char*> &s );
302
+ Node *expand_nodes( Node *node, Transducer *a, set<char*> &s );
303
+ void expand_node( Node *origin, Label &l, Node *target, Transducer *a, set<char*> &s );
304
+ void copy_nodes( Node *search_node, Transducer *copy_tr,
305
+ Node *start_node,
306
+ map<int, Node*> &mapper );
307
+ Transducer &remove_epsilons();
308
+ // ...HFST additions end
309
+
310
+ Node *root_node( void ) { return &root; }; // returns the root node
311
+ const Node *root_node( void ) const { return &root; }; // returns the root node
312
+ Node *new_node( void ); // memory alocation for a new node
313
+ Arc *new_arc( Label l, Node *target ); // memory alocation for a new arc
314
+ void add_string( char *s, bool extended=false, Alphabet *a=NULL );
315
+ void complete_alphabet( void );
316
+ void minimise_alphabet( void );
317
+ std::pair<size_t,size_t> nodeindexing( vector<Node*> *nodearray=NULL );
318
+
319
+ int print_strings( FILE*, bool with_brackets=true ); //enumerate all strings
320
+
321
+ bool analyze_string( char *s, FILE *file, bool with_brackets=true );
322
+ bool generate_string( char *s, FILE *file, bool with_brackets=true );
323
+ void generate( FILE *file, int max=-1, OutputType ot=Joint );
324
+
325
+ void clear( void ); // clears the transducer. The resulting transducer
326
+ // is like one created with Transducer()
327
+ // copy duplicates a transducer
328
+ // if called with a non-zero first argument, upper and lower level are switched
329
+ // if called with an alphabet as second argument, the label encoding
330
+ // of the second argument is transferred to the transducer copy
331
+ Transducer &copy( bool lswitch=false, const Alphabet *al=NULL );
332
+ Transducer &switch_levels( void ) { return copy( true ); };
333
+ Transducer &splice( Label l, Transducer *a);
334
+ Transducer &freely_insert( Label l );
335
+ Transducer &replace_char( Character c, Character nc );
336
+ Transducer &level( Level );
337
+ Transducer &lower_level( void ) // creates an transducer for the "lower" language
338
+ { return level(lower); };
339
+ Transducer &upper_level( void ) // creates an transducer for the "upper" language
340
+ { return level(upper); };
341
+ Transducer &determinise( bool copy_alphabet=true ); // creates a deterministic transducer
342
+ Transducer &minimise( bool verbose=true );
343
+ void store( FILE* ); // stores the transducer in binary format
344
+ void store_lowmem( FILE* );
345
+ void read( FILE* ); // reads an transducer in binary format
346
+ bool enumerate_paths( vector<Transducer*>& );
347
+
348
+ size_t size();
349
+
350
+ void build_transtab( vector<Transition> &transtab );
351
+
352
+ Transducer &reverse( bool copy_alphabet=true ); // reverse language
353
+ Transducer &operator|( Transducer& ); // union, disjunction
354
+ Transducer &operator+( Transducer& ); // concatenation
355
+ Transducer &operator/( Transducer& ); // subtraction
356
+ Transducer &operator&( Transducer& ); // intersection, conjunction
357
+ Transducer &operator||( Transducer& ); // composition
358
+ Transducer &operator!( void ); // complement, negation
359
+ Transducer &kleene_star( void );
360
+ bool operator==( Transducer& ); // minimises its arguments first
361
+
362
+ bool is_cyclic( void );
363
+ bool is_automaton( void );
364
+ bool is_infinitely_ambiguous( void );
365
+ bool is_empty( void ); // For efficiency reasons, these functions
366
+ bool generates_empty_string( void );// are better called after minimisation
367
+
368
+ friend class EdgeCount;
369
+ friend class MakeCompactTransducer;
370
+ friend class Minimiser;
371
+ friend ostream &operator<<(ostream&, Transducer&);
372
+ };
373
+ }
374
+ #endif
@@ -0,0 +1,681 @@
1
+
2
+ /*******************************************************************/
3
+ /* */
4
+ /* FILE hopcroft.C */
5
+ /* MODULE hopcroft */
6
+ /* PROGRAM SFST */
7
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
8
+ /* */
9
+ /*******************************************************************/
10
+
11
+ #include "fst.h"
12
+
13
+ // HFST
14
+ namespace SFST
15
+ {
16
+
17
+
18
+ /*******************************************************************/
19
+ /* */
20
+ /* Transducer::rev_det_minimise */
21
+ /* */
22
+ /*******************************************************************/
23
+
24
+ #if 0
25
+ // alternative less efficient minimisation algorithm
26
+ Transducer &Transducer::rev_det_minimise( bool verbose )
27
+
28
+ {
29
+ if (minimised)
30
+ return copy();
31
+
32
+ Transducer *a1, *a2;
33
+
34
+ a1 = &reverse();
35
+ a2 = &a1->determinise();
36
+ delete a1;
37
+
38
+ a1 = &a2->reverse();
39
+ delete a2;
40
+
41
+ a2 = &a1->determinise();
42
+ delete a1;
43
+
44
+ a2->minimised = true;
45
+ a2->minimise_alphabet();
46
+
47
+ return *a2;
48
+ }
49
+ #endif
50
+
51
+
52
+ /***************** class Minimiser *****************************/
53
+
54
+ class Minimiser {
55
+
56
+
57
+ /***************** class Transition **************************/
58
+
59
+ class Transition {
60
+
61
+ public:
62
+ Index source;
63
+ Index next_for_target;
64
+ Index next_for_label;
65
+ Label label;
66
+
67
+ Transition( Index s, Label l, Index n ) {
68
+ source = s;
69
+ label = l;
70
+ next_for_target = n;
71
+ next_for_label = undef;
72
+ }
73
+ };
74
+
75
+
76
+ /***************** class State *******************************/
77
+
78
+ class State {
79
+
80
+ public:
81
+ Index group; // index of group to which this state belongs
82
+ Index next_in_group; // index of next state in group
83
+ Index previous_in_group; // index of previous state in group
84
+ Index first_transition; // index of first transition with this
85
+ // state as target
86
+
87
+ State() {
88
+ group = next_in_group = previous_in_group = undef;
89
+ first_transition = undef;
90
+ }
91
+ };
92
+
93
+
94
+ /***************** class StateGroup **************************/
95
+
96
+ class StateGroup {
97
+
98
+ public:
99
+ Index next; // index of next source group
100
+ Index next_in_agenda;
101
+ Index previous_in_agenda;
102
+
103
+ Index size; // number of states in this group
104
+ Index first_state; // pointer to first state
105
+
106
+ Index new_size;
107
+ Index first_new_state; // pointer to the set of intersection states
108
+
109
+ void init( Index i ) {
110
+ next_in_agenda = i;
111
+ size = new_size = 0;
112
+ next = first_state = first_new_state = undef;
113
+ }
114
+ bool is_empty() {
115
+ return first_state == undef;
116
+ }
117
+ };
118
+
119
+
120
+ /***************** class Agenda *****************************/
121
+
122
+ class Agenda {
123
+
124
+ static const Index bucket_count = (Index)(sizeof(Index) * 8);
125
+ // the first "bucket_count" many groups are dummy groups
126
+ // used as the agenda buckets
127
+
128
+ vector<StateGroup> &group;
129
+
130
+ public:
131
+
132
+ Agenda( vector<StateGroup> &g ) : group(g) {
133
+ // allocate some dummy groups for the agenda
134
+ g.resize(bucket_count);
135
+ for( Index i=0; i<bucket_count; i++ )
136
+ group[i].next_in_agenda = group[i].previous_in_agenda = i;
137
+ }
138
+
139
+ Index pop() {
140
+ for( Index i=0; i<bucket_count; i++ ) {
141
+ if (group[i].next_in_agenda != i) {
142
+ Index result = group[i].next_in_agenda;
143
+ erase( result );
144
+ return result;
145
+ }
146
+ }
147
+ return undef;
148
+ }
149
+
150
+ void add( Index g, Index size ) {
151
+
152
+ // find the bucket
153
+ Index i;
154
+ for( i=0; (size >>= 1); i++ ) ;
155
+
156
+ // insert the new group
157
+ Index next = group[i].next_in_agenda;
158
+ group[i].next_in_agenda = g;
159
+ group[g].next_in_agenda = next;
160
+ group[g].previous_in_agenda = i;
161
+ group[next].previous_in_agenda = g;
162
+ }
163
+
164
+ void erase( Index g ) {
165
+ // update the pointers
166
+ Index next = group[g].next_in_agenda;
167
+ Index previous = group[g].previous_in_agenda;
168
+ group[previous].next_in_agenda = next;
169
+ group[next].previous_in_agenda = previous;
170
+
171
+ // unlink the result element
172
+ group[g].previous_in_agenda = group[g].next_in_agenda = g;
173
+ }
174
+
175
+ bool contains( Index g ) {
176
+ return (group[g].next_in_agenda != g);
177
+ }
178
+
179
+ Index number_of_buckets() { return bucket_count; }
180
+ };
181
+
182
+
183
+ /***************************************************************/
184
+
185
+
186
+ Transducer &transducer; // pointer to original transducer
187
+ size_t number_of_nodes; // node count in original t.
188
+ size_t number_of_transitions; // transition count in original t.
189
+ vector<Node*> nodearray; // maps indices to original transducer nodes
190
+
191
+ // CAVEAT: Do not use references to elements of the group vector
192
+ // because they become invalid when the group vector is resized.
193
+ vector<StateGroup> group;
194
+ vector<State> state;
195
+ vector<Transition> transition;
196
+ Agenda agenda;
197
+
198
+ // data structure for the sets of incoming transitions
199
+ typedef map<Label,Index> Label2TransSet;
200
+
201
+ // "first_transition_for_label" maps a label to a list of transitions
202
+ // to (states in) C that are labelled with the respective label
203
+ Label2TransSet first_transition_for_label;
204
+
205
+ Index first_source_group; // linked list of source groups
206
+
207
+ public:
208
+ Minimiser( Transducer &t );
209
+ Transducer &result();
210
+
211
+ private:
212
+ // transform the transducer to the representation needed for minimisation
213
+ void add_transition( Index s, Label l, Index t );
214
+ void link_state_in( Index &first_state, Index s );
215
+ void add_state( Index g, Index s );
216
+ void link_state_out( Index &first_state, Index s );
217
+ void remove_state( Index g, Index s );
218
+ void move_state_to_new( Index g, Index s );
219
+ void merge_state_lists( Index g );
220
+
221
+ void compute_source_states( Index g );
222
+ void process_source_groups( Label l );
223
+ void split( Index g, Label l );
224
+
225
+ Index first_group() { return agenda.number_of_buckets(); }
226
+
227
+ Transducer &build_transducer();
228
+
229
+ #if 0
230
+ void print_groups() {
231
+ fputs("--------------\n", stderr);
232
+ for( size_t g=first_group(); g<group.size(); g++ ) {
233
+ fprintf(stderr,"group %lu: ", (unsigned long)g-first_group());
234
+ if (group[g].first_state != undef) {
235
+ Index s = group[g].first_state;
236
+ do {
237
+ fprintf(stderr,"%lu ", (unsigned long)s);
238
+ s = state[s].next_in_group;
239
+ } while (s != group[g].first_state);
240
+ }
241
+ if (group[g].first_new_state != undef) {
242
+ fputs("| ", stderr);
243
+ Index s = group[g].first_new_state;
244
+ do {
245
+ fprintf(stderr,"%lu ", (unsigned long)s);
246
+ s = state[s].next_in_group;
247
+ } while (s != group[g].first_new_state);
248
+ }
249
+ fputc('\n', stderr);
250
+ }
251
+ }
252
+ #endif
253
+ };
254
+
255
+
256
+ /*******************************************************************/
257
+ /* */
258
+ /* Transducer::minimise */
259
+ /* */
260
+ /*******************************************************************/
261
+
262
+ Transducer &Transducer::minimise( bool verbose )
263
+
264
+ {
265
+ if (minimised)
266
+ return copy();
267
+
268
+ Transducer *a1 = &reverse( false );
269
+ Transducer *a2 = &a1->reverse( false );
270
+ delete a1;
271
+ a1 = &a2->determinise( false );
272
+ delete a2;
273
+
274
+ Transducer *result = &Minimiser( *a1 ).result();
275
+ delete a1;
276
+
277
+ result->minimised = true;
278
+ result->alphabet.copy(alphabet);
279
+ result->minimise_alphabet();
280
+
281
+ return *result;
282
+ }
283
+
284
+
285
+ /*******************************************************************/
286
+ /* */
287
+ /* Minimiser::Minimiser */
288
+ /* */
289
+ /*******************************************************************/
290
+
291
+ Minimiser::Minimiser( Transducer &t )
292
+ : transducer(t), agenda(group)
293
+
294
+ {
295
+ std::pair<size_t, size_t> NC_TC = t.nodeindexing( &nodearray );
296
+ number_of_nodes = NC_TC.first;
297
+ number_of_transitions = NC_TC.second;
298
+
299
+ state.resize(number_of_nodes);
300
+ transition.reserve(number_of_transitions);
301
+
302
+ group.reserve(number_of_nodes+first_group());
303
+
304
+ // one group for final and non-final transducers resp.
305
+ Index final = (Index)group.size();
306
+ group.push_back( StateGroup() );
307
+ group.back().init( final );
308
+
309
+ Index nonfinal = (Index)group.size();
310
+ group.push_back( StateGroup() );
311
+ group.back().init(nonfinal);
312
+
313
+ // build the transition table
314
+ for( Index sourceID=0; sourceID<(Index)nodearray.size(); sourceID++ ) {
315
+ Node *node = nodearray[sourceID];
316
+
317
+ if (node->is_final())
318
+ add_state( final, sourceID );
319
+ else
320
+ add_state( nonfinal, sourceID );
321
+
322
+ for( ArcsIter p(node->arcs()); p; p++ ) {
323
+ Arc *arc=p;
324
+ add_transition( sourceID, arc->label(), arc->target_node()->index );
325
+ }
326
+ }
327
+ }
328
+
329
+
330
+ /*******************************************************************/
331
+ /* */
332
+ /* Minimiser::link_state_in */
333
+ /* */
334
+ /*******************************************************************/
335
+
336
+ void Minimiser::link_state_in( Index &first_state, Index s )
337
+
338
+ {
339
+ if (first_state == undef) {
340
+ first_state = s;
341
+ state[s].next_in_group = state[s].previous_in_group = s;
342
+ }
343
+ else {
344
+ Index n = state[first_state].next_in_group;
345
+ state[first_state].next_in_group = s;
346
+ state[s].next_in_group = n;
347
+ state[n].previous_in_group = s;
348
+ state[s].previous_in_group = first_state;
349
+ }
350
+ }
351
+
352
+
353
+ /*******************************************************************/
354
+ /* */
355
+ /* Minimiser::add_state */
356
+ /* */
357
+ /*******************************************************************/
358
+
359
+ void Minimiser::add_state( Index g, Index s )
360
+
361
+ {
362
+ group[g].size++;
363
+ state[s].group = g;
364
+ link_state_in( group[g].first_state, s );
365
+ }
366
+
367
+
368
+ /*******************************************************************/
369
+ /* */
370
+ /* Minimiser::link_state_out */
371
+ /* */
372
+ /*******************************************************************/
373
+
374
+ void Minimiser::link_state_out( Index &first_state, Index s )
375
+
376
+ {
377
+ State &S = state[s];
378
+ // only state in group ?
379
+ if (S.next_in_group == s)
380
+ first_state = undef;
381
+ else {
382
+ Index p = S.previous_in_group;
383
+ Index n = S.next_in_group;
384
+ state[p].next_in_group = n;
385
+ state[n].previous_in_group = p;
386
+ if (first_state == s)
387
+ first_state = n;
388
+ }
389
+ }
390
+
391
+
392
+ /*******************************************************************/
393
+ /* */
394
+ /* Minimiser::remove_state */
395
+ /* */
396
+ /*******************************************************************/
397
+
398
+ void Minimiser::remove_state( Index g, Index s )
399
+
400
+ {
401
+ group[g].size--;
402
+ link_state_out( group[g].first_state, s );
403
+ }
404
+
405
+
406
+ /*******************************************************************/
407
+ /* */
408
+ /* Minimiser::move_state_to_new */
409
+ /* */
410
+ /*******************************************************************/
411
+
412
+ void Minimiser::move_state_to_new( Index g, Index s )
413
+
414
+ {
415
+ group[g].size--;
416
+ group[g].new_size++;
417
+
418
+ link_state_out( group[g].first_state, s );
419
+ link_state_in( group[g].first_new_state, s );
420
+ }
421
+
422
+
423
+ /*******************************************************************/
424
+ /* */
425
+ /* Minimiser::merge_state_lists */
426
+ /* */
427
+ /*******************************************************************/
428
+
429
+ void Minimiser::merge_state_lists( Index g )
430
+
431
+ {
432
+ Index first1 = group[g].first_state;
433
+ if (first1 == undef)
434
+ group[g].first_state = group[g].first_new_state;
435
+ else {
436
+ Index first2 = group[g].first_new_state;
437
+ Index next1 = state[first1].next_in_group;
438
+ Index next2 = state[first2].next_in_group;
439
+ state[first1].next_in_group = next2;
440
+ state[first2].next_in_group = next1;
441
+ state[next1].previous_in_group = first2;
442
+ state[next2].previous_in_group = first1;
443
+ }
444
+ group[g].first_new_state = undef;
445
+ group[g].size += group[g].new_size;
446
+ group[g].new_size = 0;
447
+ }
448
+
449
+
450
+ /*******************************************************************/
451
+ /* */
452
+ /* Minimiser::add_transition */
453
+ /* */
454
+ /*******************************************************************/
455
+
456
+ void Minimiser::add_transition( Index s, Label l, Index t )
457
+
458
+ {
459
+ Transition T( s, l, state[t].first_transition );
460
+ state[t].first_transition = (Index)transition.size();
461
+ transition.push_back(T);
462
+ }
463
+
464
+
465
+ /*******************************************************************/
466
+ /* */
467
+ /* Minimiser::result */
468
+ /* */
469
+ /*******************************************************************/
470
+
471
+ Transducer &Minimiser::result()
472
+
473
+ {
474
+ if (number_of_nodes == 1)
475
+ return transducer.copy(); // no need for a minimisation
476
+
477
+ Index final = first_group();
478
+ Index nonfinal = final + 1;
479
+ if (group[final].is_empty())
480
+ // no final transducers
481
+ return *new Transducer( true ); // return an empty transducer
482
+
483
+ if (group[nonfinal].is_empty()) {
484
+ // no non-final transducers
485
+ group.pop_back();
486
+ agenda.add(final, group[final].size);
487
+ }
488
+ else {
489
+ agenda.add(final, group[final].size);
490
+ agenda.add(nonfinal, group[nonfinal].size);
491
+ }
492
+
493
+ Index g;
494
+ while ((g = agenda.pop()) != undef) {
495
+
496
+ compute_source_states( g );
497
+
498
+ // for all labels appearing on incoming transitions
499
+ for( Label2TransSet::iterator it=first_transition_for_label.begin();
500
+ it!=first_transition_for_label.end(); it++ )
501
+ {
502
+ process_source_groups( it->first );
503
+ }
504
+ if (group.size() - first_group() == number_of_nodes)
505
+ break;
506
+ }
507
+ Transducer &t = build_transducer();
508
+
509
+ return t;
510
+ }
511
+
512
+
513
+ /*******************************************************************/
514
+ /* */
515
+ /* Minimiser::compute_source_states */
516
+ /* */
517
+ /*******************************************************************/
518
+
519
+ void Minimiser::compute_source_states( Index g )
520
+
521
+ {
522
+ first_transition_for_label.clear();
523
+
524
+ // for all states S in C
525
+ Index first = group[g].first_state;
526
+ Index s = first;
527
+ do {
528
+ State &S = state[s];
529
+ // for all transitions T into S
530
+ for( Index t=S.first_transition; t!=undef;
531
+ t=transition[t].next_for_target )
532
+ {
533
+ Transition &T = transition[t];
534
+ T.next_for_label = undef;
535
+ // add the transition to the list of
536
+ // incoming transitions with the same label
537
+ Label2TransSet::iterator it=first_transition_for_label.find(T.label);
538
+ if (it == first_transition_for_label.end())
539
+ // add a new mapping
540
+ first_transition_for_label[T.label] = t;
541
+ else {
542
+ // prepend the new element to the list
543
+ T.next_for_label = it->second;
544
+ it->second = t;
545
+ }
546
+ }
547
+ s = S.next_in_group;
548
+ }
549
+ while (s != first);
550
+ }
551
+
552
+
553
+ /*******************************************************************/
554
+ /* */
555
+ /* Minimiser::process_source_groups */
556
+ /* */
557
+ /*******************************************************************/
558
+
559
+ void Minimiser::process_source_groups( Label l )
560
+
561
+ {
562
+ first_source_group = undef;
563
+
564
+ // for all incoming transitions with label l
565
+ for( Index t = first_transition_for_label[l]; t != undef;
566
+ t = transition[t].next_for_label )
567
+ {
568
+ // get the transition, source state, and source state group
569
+ Transition &T = transition[t];
570
+ State &S = state[T.source];
571
+ Index g = S.group;
572
+
573
+ // If new, add this group to the list of source groups
574
+ if (group[g].first_new_state == undef) {
575
+ group[g].next = first_source_group;
576
+ first_source_group = S.group;
577
+ }
578
+
579
+ move_state_to_new(g, T.source );
580
+ }
581
+
582
+ // for all source groups
583
+ for( Index g = first_source_group; g != undef; g = group[g].next ) {
584
+ if (group[g].size > 0)
585
+ split( g, l );
586
+ else
587
+ merge_state_lists( g );
588
+ }
589
+ return;
590
+ }
591
+
592
+
593
+
594
+ /*******************************************************************/
595
+ /* */
596
+ /* Minimiser::split */
597
+ /* */
598
+ /*******************************************************************/
599
+
600
+ void Minimiser::split( Index g, Label l )
601
+
602
+ {
603
+ // create a new group
604
+ Index newg = (Index)group.size();
605
+ group.push_back( StateGroup() );
606
+ StateGroup &NewG = group.back();
607
+ NewG.init( newg );
608
+ NewG.first_state = group[g].first_new_state;
609
+ NewG.size = group[g].new_size;
610
+ group[g].first_new_state = undef;
611
+ group[g].new_size = 0;
612
+ Index s = NewG.first_state;
613
+
614
+ do {
615
+ state[s].group = newg;
616
+ s = state[s].next_in_group;
617
+ }
618
+ while (s != NewG.first_state);
619
+
620
+ // update the agenda
621
+
622
+ if (agenda.contains( g )) {
623
+ // G was on the agenda
624
+ agenda.erase(g);
625
+ agenda.add(g, group[g].size);
626
+ agenda.add(newg, group[newg].size);
627
+ }
628
+ // Otherwise, put the smaller subgroup on the agenda
629
+ else if (group[g].size < group[newg].size)
630
+ agenda.add(g, group[g].size);
631
+ else
632
+ agenda.add(newg, group[newg].size);
633
+
634
+ return;
635
+ }
636
+
637
+
638
+ /*******************************************************************/
639
+ /* */
640
+ /* Minimiser::build_transducer */
641
+ /* */
642
+ /*******************************************************************/
643
+
644
+ Transducer &Minimiser::build_transducer()
645
+
646
+ {
647
+ Transducer *t = new Transducer( true );
648
+ t->alphabet.copy(transducer.alphabet);
649
+
650
+
651
+ // create the nodes of the new transducer
652
+ vector<Node*> node(group.size(), NULL);
653
+
654
+ // define the root node
655
+ node[state[0].group] = t->root_node();
656
+
657
+ for( size_t i=first_group(); i<node.size(); i++ )
658
+ if (node[i] == NULL)
659
+ node[i] = t->new_node();
660
+
661
+ // Add the transitions
662
+ for( size_t g=first_group(); g<group.size(); g++ ) {
663
+ Node *old_node = nodearray[group[g].first_state];
664
+ Node *new_node = node[g];
665
+ new_node->set_final( old_node->is_final() );
666
+
667
+ for( ArcsIter p(old_node->arcs()); p; p++ ) {
668
+ Arc *arc=p;
669
+ // Compute the ID of the target state
670
+ Index ts = (Index)arc->target_node()->index;
671
+ // Get the node for the corresponding state group
672
+ Node *target = node[state[ts].group];
673
+ // Insert the transition
674
+ new_node->add_arc( arc->label(), target, t );
675
+ }
676
+ }
677
+
678
+ return *t;
679
+ }
680
+
681
+ }