ruby-sfst 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,365 @@
1
+ /*******************************************************************/
2
+ /* */
3
+ /* FILE fst.h */
4
+ /* MODULE fst */
5
+ /* PROGRAM SFST */
6
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
+ /* */
8
+ /* PURPOSE finite state tools */
9
+ /* */
10
+ /*******************************************************************/
11
+
12
+ #ifndef _FST_H_
13
+ #define _FST_H_
14
+
15
+ #include "alphabet.h"
16
+
17
+
18
+ /*******************************************************************/
19
+ /* include commands */
20
+ /*******************************************************************/
21
+
22
+ #include <string>
23
+
24
+ #include <vector>
25
+
26
+ #include "mem.h"
27
+
28
+ typedef unsigned short VType;
29
+
30
+ extern int Quiet;
31
+
32
+ class Node;
33
+ class Arc;
34
+ class Arcs;
35
+ class Transducer;
36
+
37
+
38
+ struct hashf {
39
+ size_t operator()(const Node *n) const { return (size_t) n; }
40
+ };
41
+ struct equalf {
42
+ int operator()(const Node *n1, const Node *n2) const { return n1==n2; }
43
+ };
44
+ typedef __gnu_cxx::hash_set<Node*, hashf, equalf> NodeHashSet;
45
+
46
+
47
+
48
+ /***************** class Arc *************************************/
49
+
50
+ class Arc {
51
+
52
+ private:
53
+ Label l;
54
+ Node *target;
55
+ Arc *next;
56
+
57
+ public:
58
+ void init( Label ll, Node *node ) { l=ll; target=node; };
59
+ Label label( void ) const { return l; };
60
+ Node *target_node( void ) { return target; };
61
+ const Node *target_node( void ) const { return target; };
62
+
63
+ friend class Arcs;
64
+ friend class ArcsIter;
65
+ };
66
+
67
+
68
+ /***************** class Arcs ************************************/
69
+
70
+ class Arcs {
71
+
72
+ private:
73
+ Arc *first_arcp;
74
+ Arc *first_epsilon_arcp;
75
+
76
+ public:
77
+ void init( void ) { first_arcp = first_epsilon_arcp = NULL; };
78
+ Arcs( void ) { init(); };
79
+ Node *target_node( Label l );
80
+ const Node *target_node( Label l ) const;
81
+ void add_arc( Label, Node*, Transducer* );
82
+ int remove_arc( Arc* );
83
+ bool is_empty( void ) const { return !(first_arcp || first_epsilon_arcp); };
84
+ bool epsilon_transition_exists( void ) const { return first_epsilon_arcp != NULL; };
85
+ bool non_epsilon_transition_exists( void ) const { return first_arcp != NULL; };
86
+ int size( void ) const;
87
+
88
+ friend class ArcsIter;
89
+ };
90
+
91
+
92
+ /***************** class ArcsIter ********************************/
93
+
94
+ class ArcsIter {
95
+
96
+ // ArcsIter iterates over the arcs starting with epsilon arcs
97
+
98
+ private:
99
+ Arc *current_arcp;
100
+ Arc *more_arcs;
101
+
102
+ public:
103
+ typedef enum {all,non_eps,eps} IterType;
104
+
105
+ ArcsIter( const Arcs *arcs, IterType type=all ) {
106
+ more_arcs = NULL;
107
+ if (type == all) {
108
+ if (arcs->first_epsilon_arcp) {
109
+ current_arcp = arcs->first_epsilon_arcp;
110
+ more_arcs = arcs->first_arcp;
111
+ }
112
+ else
113
+ current_arcp = arcs->first_arcp;
114
+ }
115
+ else if (type == non_eps)
116
+ current_arcp = arcs->first_arcp;
117
+ else
118
+ current_arcp = arcs->first_epsilon_arcp;
119
+ };
120
+
121
+ void operator++( int ) {
122
+ if (current_arcp) {
123
+ current_arcp = current_arcp->next;
124
+ if (!current_arcp && more_arcs) {
125
+ current_arcp = more_arcs;
126
+ more_arcs = NULL;
127
+ }
128
+ }
129
+ };
130
+ operator Arc*( void ) { return current_arcp; };
131
+
132
+ };
133
+
134
+
135
+ /***************** class Node ************************************/
136
+
137
+ class Node {
138
+
139
+ private:
140
+ bool final;
141
+ VType visited;
142
+ Arcs arcsp;
143
+ Node *forwardp;
144
+
145
+ public:
146
+ Node( void ) { init(); };
147
+ void init( void );
148
+ bool is_final( void ) const { return final; };
149
+ void set_final( bool flag ) { final = flag; };
150
+ void set_forward( Node *node ) { forwardp = node; };
151
+ const Node *target_node( Label l ) const { return arcs()->target_node(l); };
152
+ Node *target_node( Label l ) { return arcs()->target_node(l); };
153
+ void add_arc( Label l, Node *n, Transducer *a ) { arcs()->add_arc(l, n, a); };
154
+ Arcs *arcs( void ) { return &arcsp; };
155
+ const Arcs *arcs( void ) const { return &arcsp; };
156
+ Node *forward( void ) { return forwardp; };
157
+ bool was_visited( VType vmark ) {
158
+ if (visited == vmark)
159
+ return true;
160
+ visited = vmark;
161
+ return false;
162
+ };
163
+ bool check_visited( VType vm ) // leaves the visited flag unchanged
164
+ { return (visited==vm); };
165
+ };
166
+
167
+
168
+ /***************** class Node2Int *********************************/
169
+
170
+ class Node2Int {
171
+
172
+ struct hashf {
173
+ size_t operator()(const Node *node) const {
174
+ return (size_t)node;
175
+ }
176
+ };
177
+ struct equalf {
178
+ int operator()(const Node *n1, const Node *n2) const {
179
+ return (n1 == n2);
180
+ }
181
+ };
182
+ typedef __gnu_cxx::hash_map<Node*, int, hashf, equalf> NL;
183
+
184
+ private:
185
+ int current_number;
186
+ NL number;
187
+
188
+ public:
189
+ int &operator[]( Node *node ) {
190
+ NL::iterator it=number.find(node);
191
+ if (it == number.end())
192
+ return number.insert(NL::value_type(node, 0)).first->second;
193
+ return it->second;
194
+ };
195
+ };
196
+
197
+
198
+ /***************** class NodeNumbering ****************************/
199
+
200
+ class NodeNumbering {
201
+
202
+ private:
203
+ std::vector<Node*> nodes;
204
+ Node2Int nummap;
205
+ void number_node( Node*, Transducer& );
206
+
207
+ public:
208
+ NodeNumbering( Transducer& );
209
+ int operator[]( Node *node ) { return nummap[node]; };
210
+ size_t number_of_nodes( void ) { return nodes.size(); };
211
+ Node *get_node( size_t n ) { return nodes[n]; };
212
+ };
213
+
214
+
215
+ /***************** class PairMapping ****************************/
216
+
217
+ class PairMapping {
218
+ // This class is used to map a node pair from two transducers
219
+ // to a single node in another transducer
220
+
221
+ typedef std::pair<Node*, Node*> NodePair;
222
+
223
+ private:
224
+ struct hashf {
225
+ size_t operator()(const NodePair p) const {
226
+ return (size_t)p.first ^ (size_t)p.second;
227
+ }
228
+ };
229
+ struct equalf {
230
+ int operator()(const NodePair p1, const NodePair p2) const {
231
+ return (p1.first==p2.first && p1.second == p2.second);
232
+ }
233
+ };
234
+ typedef __gnu_cxx::hash_map<NodePair, Node*, hashf, equalf> PairMap;
235
+ PairMap pm;
236
+
237
+ public:
238
+ typedef PairMap::iterator iterator;
239
+ iterator begin( void ) { return pm.begin(); };
240
+ iterator end( void ) { return pm.end(); };
241
+ iterator find( Node *n1, Node *n2 )
242
+ { return pm.find( NodePair(n1,n2) ); };
243
+ Node* &operator[]( NodePair p ) { return pm.operator[](p); };
244
+
245
+ };
246
+
247
+
248
+ /***************** class Transducer *******************************/
249
+
250
+ class Transducer {
251
+
252
+ private:
253
+ bool deterministic;
254
+ bool minimised;
255
+ Node root;
256
+ Mem mem;
257
+
258
+ typedef std::set<Label, Label::label_cmp> LabelSet;
259
+ typedef __gnu_cxx::hash_map<Character, char*> SymbolMap;
260
+
261
+ void reverse_node( Node *old_node, Transducer *new_node );
262
+ Label recode_label( Label, bool lswitch, bool recode, Alphabet& );
263
+ Node *copy_nodes( Node *n, Transducer *a,
264
+ bool lswitch=false, bool recode=false );
265
+ void rec_cat_nodes( Node*, Node* );
266
+ bool productive_node( Node* );
267
+ bool prune_nodes( Node* );
268
+ void negate_nodes( Node*, Node* );
269
+ bool compare_nodes( Node *node, Node *node2, Transducer &a2 );
270
+ void map_nodes( Node *node, Node *node2, Transducer *a, Level level );
271
+ void freely_insert_at_node( Node *node, Label l );
272
+ int print_strings_node(Node *node, char *buffer, int pos, FILE *file, bool);
273
+ bool infinitely_ambiguous_node( Node* );
274
+ bool is_cyclic_node( Node*, NodeHashSet &visited );
275
+ bool is_automaton_node( Node* );
276
+ bool generate1( Node*, Node2Int&, char*, int, char*, int, FILE* );
277
+ void store_symbols( Node*, SymbolMap&, LabelSet& );
278
+
279
+ void splice_nodes(Node*, Node*, Label sl, Transducer*, Transducer*);
280
+ void splice_arc( Node*, Node*, Node*, Transducer* );
281
+ void enumerate_paths_node( Node*, std::vector<Label>&, NodeHashSet&,
282
+ std::vector<Transducer*>& );
283
+ void replace_char2( Node*, Node*, Character, Character, Transducer* );
284
+ Node *create_node( std::vector<Node*>&, char*, size_t line );
285
+ void read_transducer_binary( FILE* );
286
+ void read_transducer_text( FILE* );
287
+
288
+ public:
289
+ VType vmark;
290
+ void incr_vmark( void ) {
291
+ if (++vmark == 0)
292
+ throw "Overflow of generation counter!";
293
+ };
294
+ Alphabet alphabet; // The set of all labels, i.e. character pairs
295
+
296
+ Transducer( void ) : root(), mem()
297
+ { vmark = 0; deterministic = minimised = false; };
298
+ // convertion of a string to an transducer
299
+ Transducer( char *s, const Alphabet *a=NULL, bool extended=false );
300
+ // reads a word list from a file and stores it in the transducer
301
+ Transducer( std::istream&, const Alphabet *a=NULL, bool verbose=false );
302
+ // reads a transducer from a binary or text file
303
+ Transducer( FILE*, bool binary=true );
304
+ // turns a sequence of labels into a transducer
305
+ Transducer( std::vector<Label>& );
306
+
307
+ Node *root_node( void ) { return &root; }; // returns the root node
308
+ const Node *root_node( void ) const { return &root; }; // returns the root node
309
+ Node *new_node( void ); // memory alocation for a new node
310
+ Arc *new_arc( Label l, Node *target ); // memory alocation for a new arc
311
+ void add_string( char *s, bool extended=false );
312
+ void complete_alphabet( void );
313
+ void minimise_alphabet( void );
314
+ void prune( void ); // remove unnecessary arcs
315
+
316
+ int print_strings( FILE*, bool with_brackets=true ); //enumerate all strings
317
+
318
+ bool analyze_string( char *s, FILE *file, bool with_brackets=true );
319
+ bool generate_string( char *s, FILE *file, bool with_brackets=true );
320
+ bool generate( FILE *file, bool separate=false );
321
+
322
+ void clear( void ); // clears the transducer. The resulting transducer
323
+ // is like one created with Transducer()
324
+ // copy duplicates an transducer
325
+ // if called with a non-zero argument, upper and lower level are switched
326
+ Transducer &copy( bool lswitch=false, const Alphabet *al=NULL );
327
+ Transducer &switch_levels( void ) { return copy( true ); };
328
+ Transducer &splice( Label l, Transducer *a);
329
+ Transducer &freely_insert( Label l );
330
+ Transducer &replace_char( Character c, Character nc );
331
+ Transducer &level( Level );
332
+ Transducer &lower_level( void ) // creates an transducer for the "lower" language
333
+ { return level(lower); };
334
+ Transducer &upper_level( void ) // creates an transducer for the "upper" language
335
+ { return level(upper); };
336
+ Transducer &determinise( void ); // creates a deterministic transducer
337
+ Transducer &minimise( bool verbose=true ); // creates a minimised transducer
338
+ void store( FILE* ); // stores the transducer in binary format
339
+ void store_lowmem( FILE* );
340
+ void read( FILE* ); // reads an transducer in binary format
341
+ bool enumerate_paths( std::vector<Transducer*>& );
342
+
343
+ Transducer &reverse( void ); // reverse language
344
+ Transducer &operator|( Transducer& ); // union, disjunction
345
+ Transducer &operator+( Transducer& ); // concatenation
346
+ Transducer &operator/( Transducer& ); // subtraction
347
+ Transducer &operator&( Transducer& ); // intersection, conjunction
348
+ Transducer &operator||( Transducer& ); // composition
349
+ Transducer &operator!( void ); // complement, negation
350
+ Transducer &kleene_star( void );
351
+ bool operator==( Transducer& ); // minimises its arguments first
352
+
353
+ bool is_cyclic( void );
354
+ bool is_automaton( void );
355
+ bool is_infinitely_ambiguous( void );
356
+ bool is_empty( void ); // For efficiency reasons, these functions
357
+ bool generates_empty_string( void );// are better called after minimisation
358
+
359
+ friend class NodeNumbering;
360
+ friend class EdgeCount;
361
+ friend class MakeCompactTransducer;
362
+ friend std::ostream &operator<<(std::ostream&, Transducer&);
363
+ };
364
+
365
+ #endif