ruby-sfst 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,365 @@
1
+ /*******************************************************************/
2
+ /* */
3
+ /* FILE fst.h */
4
+ /* MODULE fst */
5
+ /* PROGRAM SFST */
6
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
+ /* */
8
+ /* PURPOSE finite state tools */
9
+ /* */
10
+ /*******************************************************************/
11
+
12
+ #ifndef _FST_H_
13
+ #define _FST_H_
14
+
15
+ #include "alphabet.h"
16
+
17
+
18
+ /*******************************************************************/
19
+ /* include commands */
20
+ /*******************************************************************/
21
+
22
+ #include <string>
23
+
24
+ #include <vector>
25
+
26
+ #include "mem.h"
27
+
28
+ typedef unsigned short VType;
29
+
30
+ extern int Quiet;
31
+
32
+ class Node;
33
+ class Arc;
34
+ class Arcs;
35
+ class Transducer;
36
+
37
+
38
+ struct hashf {
39
+ size_t operator()(const Node *n) const { return (size_t) n; }
40
+ };
41
+ struct equalf {
42
+ int operator()(const Node *n1, const Node *n2) const { return n1==n2; }
43
+ };
44
+ typedef __gnu_cxx::hash_set<Node*, hashf, equalf> NodeHashSet;
45
+
46
+
47
+
48
+ /***************** class Arc *************************************/
49
+
50
+ class Arc {
51
+
52
+ private:
53
+ Label l;
54
+ Node *target;
55
+ Arc *next;
56
+
57
+ public:
58
+ void init( Label ll, Node *node ) { l=ll; target=node; };
59
+ Label label( void ) const { return l; };
60
+ Node *target_node( void ) { return target; };
61
+ const Node *target_node( void ) const { return target; };
62
+
63
+ friend class Arcs;
64
+ friend class ArcsIter;
65
+ };
66
+
67
+
68
+ /***************** class Arcs ************************************/
69
+
70
+ class Arcs {
71
+
72
+ private:
73
+ Arc *first_arcp;
74
+ Arc *first_epsilon_arcp;
75
+
76
+ public:
77
+ void init( void ) { first_arcp = first_epsilon_arcp = NULL; };
78
+ Arcs( void ) { init(); };
79
+ Node *target_node( Label l );
80
+ const Node *target_node( Label l ) const;
81
+ void add_arc( Label, Node*, Transducer* );
82
+ int remove_arc( Arc* );
83
+ bool is_empty( void ) const { return !(first_arcp || first_epsilon_arcp); };
84
+ bool epsilon_transition_exists( void ) const { return first_epsilon_arcp != NULL; };
85
+ bool non_epsilon_transition_exists( void ) const { return first_arcp != NULL; };
86
+ int size( void ) const;
87
+
88
+ friend class ArcsIter;
89
+ };
90
+
91
+
92
+ /***************** class ArcsIter ********************************/
93
+
94
+ class ArcsIter {
95
+
96
+ // ArcsIter iterates over the arcs starting with epsilon arcs
97
+
98
+ private:
99
+ Arc *current_arcp;
100
+ Arc *more_arcs;
101
+
102
+ public:
103
+ typedef enum {all,non_eps,eps} IterType;
104
+
105
+ ArcsIter( const Arcs *arcs, IterType type=all ) {
106
+ more_arcs = NULL;
107
+ if (type == all) {
108
+ if (arcs->first_epsilon_arcp) {
109
+ current_arcp = arcs->first_epsilon_arcp;
110
+ more_arcs = arcs->first_arcp;
111
+ }
112
+ else
113
+ current_arcp = arcs->first_arcp;
114
+ }
115
+ else if (type == non_eps)
116
+ current_arcp = arcs->first_arcp;
117
+ else
118
+ current_arcp = arcs->first_epsilon_arcp;
119
+ };
120
+
121
+ void operator++( int ) {
122
+ if (current_arcp) {
123
+ current_arcp = current_arcp->next;
124
+ if (!current_arcp && more_arcs) {
125
+ current_arcp = more_arcs;
126
+ more_arcs = NULL;
127
+ }
128
+ }
129
+ };
130
+ operator Arc*( void ) { return current_arcp; };
131
+
132
+ };
133
+
134
+
135
+ /***************** class Node ************************************/
136
+
137
+ class Node {
138
+
139
+ private:
140
+ bool final;
141
+ VType visited;
142
+ Arcs arcsp;
143
+ Node *forwardp;
144
+
145
+ public:
146
+ Node( void ) { init(); };
147
+ void init( void );
148
+ bool is_final( void ) const { return final; };
149
+ void set_final( bool flag ) { final = flag; };
150
+ void set_forward( Node *node ) { forwardp = node; };
151
+ const Node *target_node( Label l ) const { return arcs()->target_node(l); };
152
+ Node *target_node( Label l ) { return arcs()->target_node(l); };
153
+ void add_arc( Label l, Node *n, Transducer *a ) { arcs()->add_arc(l, n, a); };
154
+ Arcs *arcs( void ) { return &arcsp; };
155
+ const Arcs *arcs( void ) const { return &arcsp; };
156
+ Node *forward( void ) { return forwardp; };
157
+ bool was_visited( VType vmark ) {
158
+ if (visited == vmark)
159
+ return true;
160
+ visited = vmark;
161
+ return false;
162
+ };
163
+ bool check_visited( VType vm ) // leaves the visited flag unchanged
164
+ { return (visited==vm); };
165
+ };
166
+
167
+
168
+ /***************** class Node2Int *********************************/
169
+
170
+ class Node2Int {
171
+
172
+ struct hashf {
173
+ size_t operator()(const Node *node) const {
174
+ return (size_t)node;
175
+ }
176
+ };
177
+ struct equalf {
178
+ int operator()(const Node *n1, const Node *n2) const {
179
+ return (n1 == n2);
180
+ }
181
+ };
182
+ typedef __gnu_cxx::hash_map<Node*, int, hashf, equalf> NL;
183
+
184
+ private:
185
+ int current_number;
186
+ NL number;
187
+
188
+ public:
189
+ int &operator[]( Node *node ) {
190
+ NL::iterator it=number.find(node);
191
+ if (it == number.end())
192
+ return number.insert(NL::value_type(node, 0)).first->second;
193
+ return it->second;
194
+ };
195
+ };
196
+
197
+
198
+ /***************** class NodeNumbering ****************************/
199
+
200
+ class NodeNumbering {
201
+
202
+ private:
203
+ std::vector<Node*> nodes;
204
+ Node2Int nummap;
205
+ void number_node( Node*, Transducer& );
206
+
207
+ public:
208
+ NodeNumbering( Transducer& );
209
+ int operator[]( Node *node ) { return nummap[node]; };
210
+ size_t number_of_nodes( void ) { return nodes.size(); };
211
+ Node *get_node( size_t n ) { return nodes[n]; };
212
+ };
213
+
214
+
215
+ /***************** class PairMapping ****************************/
216
+
217
+ class PairMapping {
218
+ // This class is used to map a node pair from two transducers
219
+ // to a single node in another transducer
220
+
221
+ typedef std::pair<Node*, Node*> NodePair;
222
+
223
+ private:
224
+ struct hashf {
225
+ size_t operator()(const NodePair p) const {
226
+ return (size_t)p.first ^ (size_t)p.second;
227
+ }
228
+ };
229
+ struct equalf {
230
+ int operator()(const NodePair p1, const NodePair p2) const {
231
+ return (p1.first==p2.first && p1.second == p2.second);
232
+ }
233
+ };
234
+ typedef __gnu_cxx::hash_map<NodePair, Node*, hashf, equalf> PairMap;
235
+ PairMap pm;
236
+
237
+ public:
238
+ typedef PairMap::iterator iterator;
239
+ iterator begin( void ) { return pm.begin(); };
240
+ iterator end( void ) { return pm.end(); };
241
+ iterator find( Node *n1, Node *n2 )
242
+ { return pm.find( NodePair(n1,n2) ); };
243
+ Node* &operator[]( NodePair p ) { return pm.operator[](p); };
244
+
245
+ };
246
+
247
+
248
+ /***************** class Transducer *******************************/
249
+
250
+ class Transducer {
251
+
252
+ private:
253
+ bool deterministic;
254
+ bool minimised;
255
+ Node root;
256
+ Mem mem;
257
+
258
+ typedef std::set<Label, Label::label_cmp> LabelSet;
259
+ typedef __gnu_cxx::hash_map<Character, char*> SymbolMap;
260
+
261
+ void reverse_node( Node *old_node, Transducer *new_node );
262
+ Label recode_label( Label, bool lswitch, bool recode, Alphabet& );
263
+ Node *copy_nodes( Node *n, Transducer *a,
264
+ bool lswitch=false, bool recode=false );
265
+ void rec_cat_nodes( Node*, Node* );
266
+ bool productive_node( Node* );
267
+ bool prune_nodes( Node* );
268
+ void negate_nodes( Node*, Node* );
269
+ bool compare_nodes( Node *node, Node *node2, Transducer &a2 );
270
+ void map_nodes( Node *node, Node *node2, Transducer *a, Level level );
271
+ void freely_insert_at_node( Node *node, Label l );
272
+ int print_strings_node(Node *node, char *buffer, int pos, FILE *file, bool);
273
+ bool infinitely_ambiguous_node( Node* );
274
+ bool is_cyclic_node( Node*, NodeHashSet &visited );
275
+ bool is_automaton_node( Node* );
276
+ bool generate1( Node*, Node2Int&, char*, int, char*, int, FILE* );
277
+ void store_symbols( Node*, SymbolMap&, LabelSet& );
278
+
279
+ void splice_nodes(Node*, Node*, Label sl, Transducer*, Transducer*);
280
+ void splice_arc( Node*, Node*, Node*, Transducer* );
281
+ void enumerate_paths_node( Node*, std::vector<Label>&, NodeHashSet&,
282
+ std::vector<Transducer*>& );
283
+ void replace_char2( Node*, Node*, Character, Character, Transducer* );
284
+ Node *create_node( std::vector<Node*>&, char*, size_t line );
285
+ void read_transducer_binary( FILE* );
286
+ void read_transducer_text( FILE* );
287
+
288
+ public:
289
+ VType vmark;
290
+ void incr_vmark( void ) {
291
+ if (++vmark == 0)
292
+ throw "Overflow of generation counter!";
293
+ };
294
+ Alphabet alphabet; // The set of all labels, i.e. character pairs
295
+
296
+ Transducer( void ) : root(), mem()
297
+ { vmark = 0; deterministic = minimised = false; };
298
+ // convertion of a string to an transducer
299
+ Transducer( char *s, const Alphabet *a=NULL, bool extended=false );
300
+ // reads a word list from a file and stores it in the transducer
301
+ Transducer( std::istream&, const Alphabet *a=NULL, bool verbose=false );
302
+ // reads a transducer from a binary or text file
303
+ Transducer( FILE*, bool binary=true );
304
+ // turns a sequence of labels into a transducer
305
+ Transducer( std::vector<Label>& );
306
+
307
+ Node *root_node( void ) { return &root; }; // returns the root node
308
+ const Node *root_node( void ) const { return &root; }; // returns the root node
309
+ Node *new_node( void ); // memory alocation for a new node
310
+ Arc *new_arc( Label l, Node *target ); // memory alocation for a new arc
311
+ void add_string( char *s, bool extended=false );
312
+ void complete_alphabet( void );
313
+ void minimise_alphabet( void );
314
+ void prune( void ); // remove unnecessary arcs
315
+
316
+ int print_strings( FILE*, bool with_brackets=true ); //enumerate all strings
317
+
318
+ bool analyze_string( char *s, FILE *file, bool with_brackets=true );
319
+ bool generate_string( char *s, FILE *file, bool with_brackets=true );
320
+ bool generate( FILE *file, bool separate=false );
321
+
322
+ void clear( void ); // clears the transducer. The resulting transducer
323
+ // is like one created with Transducer()
324
+ // copy duplicates an transducer
325
+ // if called with a non-zero argument, upper and lower level are switched
326
+ Transducer &copy( bool lswitch=false, const Alphabet *al=NULL );
327
+ Transducer &switch_levels( void ) { return copy( true ); };
328
+ Transducer &splice( Label l, Transducer *a);
329
+ Transducer &freely_insert( Label l );
330
+ Transducer &replace_char( Character c, Character nc );
331
+ Transducer &level( Level );
332
+ Transducer &lower_level( void ) // creates an transducer for the "lower" language
333
+ { return level(lower); };
334
+ Transducer &upper_level( void ) // creates an transducer for the "upper" language
335
+ { return level(upper); };
336
+ Transducer &determinise( void ); // creates a deterministic transducer
337
+ Transducer &minimise( bool verbose=true ); // creates a minimised transducer
338
+ void store( FILE* ); // stores the transducer in binary format
339
+ void store_lowmem( FILE* );
340
+ void read( FILE* ); // reads an transducer in binary format
341
+ bool enumerate_paths( std::vector<Transducer*>& );
342
+
343
+ Transducer &reverse( void ); // reverse language
344
+ Transducer &operator|( Transducer& ); // union, disjunction
345
+ Transducer &operator+( Transducer& ); // concatenation
346
+ Transducer &operator/( Transducer& ); // subtraction
347
+ Transducer &operator&( Transducer& ); // intersection, conjunction
348
+ Transducer &operator||( Transducer& ); // composition
349
+ Transducer &operator!( void ); // complement, negation
350
+ Transducer &kleene_star( void );
351
+ bool operator==( Transducer& ); // minimises its arguments first
352
+
353
+ bool is_cyclic( void );
354
+ bool is_automaton( void );
355
+ bool is_infinitely_ambiguous( void );
356
+ bool is_empty( void ); // For efficiency reasons, these functions
357
+ bool generates_empty_string( void );// are better called after minimisation
358
+
359
+ friend class NodeNumbering;
360
+ friend class EdgeCount;
361
+ friend class MakeCompactTransducer;
362
+ friend std::ostream &operator<<(std::ostream&, Transducer&);
363
+ };
364
+
365
+ #endif