ruby-sfst 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -0
  3. data/COPYING +280 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +54 -0
  6. data/README.md +1 -1
  7. data/Rakefile +9 -18
  8. data/bin/console +7 -0
  9. data/bin/setup +6 -0
  10. data/ext/sfst/alphabet.cc +879 -0
  11. data/ext/sfst/alphabet.h +302 -0
  12. data/ext/sfst/basic.cc +85 -0
  13. data/ext/{sfst_machine → sfst}/basic.h +7 -4
  14. data/ext/sfst/compact.cc +629 -0
  15. data/ext/sfst/compact.h +100 -0
  16. data/ext/sfst/determinise.cc +279 -0
  17. data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
  18. data/ext/sfst/fst.cc +1150 -0
  19. data/ext/sfst/fst.h +374 -0
  20. data/ext/sfst/hopcroft.cc +681 -0
  21. data/ext/sfst/interface.cc +1921 -0
  22. data/ext/sfst/interface.h +171 -0
  23. data/ext/sfst/make-compact.cc +323 -0
  24. data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
  25. data/ext/sfst/mem.h +80 -0
  26. data/ext/sfst/operators.cc +1273 -0
  27. data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
  28. data/ext/sfst/sgi.h +72 -0
  29. data/ext/sfst/utf8.cc +149 -0
  30. data/ext/{sfst_machine → sfst}/utf8.h +7 -4
  31. data/lib/sfst.rb +2 -1
  32. data/lib/sfst/version.rb +1 -1
  33. data/ruby-sfst.gemspec +23 -23
  34. metadata +107 -35
  35. data/ext/sfst_machine/alphabet.cc +0 -812
  36. data/ext/sfst_machine/alphabet.h +0 -273
  37. data/ext/sfst_machine/basic.cc +0 -84
  38. data/ext/sfst_machine/compact.cc +0 -616
  39. data/ext/sfst_machine/compact.h +0 -98
  40. data/ext/sfst_machine/determinise.cc +0 -303
  41. data/ext/sfst_machine/fst.cc +0 -1000
  42. data/ext/sfst_machine/fst.h +0 -369
  43. data/ext/sfst_machine/interface.cc +0 -1842
  44. data/ext/sfst_machine/interface.h +0 -93
  45. data/ext/sfst_machine/make-compact.cc +0 -327
  46. data/ext/sfst_machine/mem.h +0 -74
  47. data/ext/sfst_machine/operators.cc +0 -1131
  48. data/ext/sfst_machine/sgi.h +0 -44
  49. data/ext/sfst_machine/utf8.cc +0 -146
  50. data/test/test_sfst.fst +0 -3
  51. data/test/test_sfst.rb +0 -114
@@ -1,369 +0,0 @@
1
- /*******************************************************************/
2
- /* */
3
- /* FILE fst.h */
4
- /* MODULE fst */
5
- /* PROGRAM SFST */
6
- /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
- /* */
8
- /* PURPOSE finite state tools */
9
- /* */
10
- /*******************************************************************/
11
-
12
- #ifndef _FST_H_
13
- #define _FST_H_
14
-
15
- #include "alphabet.h"
16
-
17
-
18
- /*******************************************************************/
19
- /* include commands */
20
- /*******************************************************************/
21
-
22
- #include <string>
23
-
24
- #include <vector>
25
-
26
- #include "mem.h"
27
-
28
- typedef unsigned short VType;
29
-
30
- extern int Quiet;
31
-
32
- class Node;
33
- class Arc;
34
- class Arcs;
35
- class Transducer;
36
-
37
-
38
- struct hashf {
39
- size_t operator()(const Node *n) const { return (size_t) n; }
40
- };
41
- struct equalf {
42
- int operator()(const Node *n1, const Node *n2) const { return n1==n2; }
43
- };
44
- typedef hash_set<Node*, hashf, equalf> NodeHashSet;
45
-
46
-
47
- /***************** class Arc *************************************/
48
-
49
- class Arc {
50
-
51
- private:
52
- Label l;
53
- Node *target;
54
- Arc *next;
55
-
56
- public:
57
- void init( Label ll, Node *node ) { l=ll; target=node; };
58
- Label label( void ) const { return l; };
59
- Node *target_node( void ) { return target; };
60
- const Node *target_node( void ) const { return target; };
61
-
62
- friend class Arcs;
63
- friend class ArcsIter;
64
- };
65
-
66
-
67
- /***************** class Arcs ************************************/
68
-
69
- class Arcs {
70
-
71
- private:
72
- Arc *first_arcp;
73
- Arc *first_epsilon_arcp;
74
-
75
- public:
76
- void init( void ) { first_arcp = first_epsilon_arcp = NULL; };
77
- Arcs( void ) { init(); };
78
- Node *target_node( Label l );
79
- const Node *target_node( Label l ) const;
80
- void add_arc( Label, Node*, Transducer* );
81
- int remove_arc( Arc* );
82
- bool is_empty( void ) const { return !(first_arcp || first_epsilon_arcp); };
83
- bool epsilon_transition_exists( void ) const { return first_epsilon_arcp != NULL; };
84
- bool non_epsilon_transition_exists( void ) const { return first_arcp != NULL; };
85
- int size( void ) const;
86
-
87
- friend class ArcsIter;
88
- };
89
-
90
-
91
- /***************** class ArcsIter ********************************/
92
-
93
- class ArcsIter {
94
-
95
- // ArcsIter iterates over the arcs starting with epsilon arcs
96
-
97
- private:
98
- Arc *current_arcp;
99
- Arc *more_arcs;
100
-
101
- public:
102
- typedef enum {all,non_eps,eps} IterType;
103
-
104
- ArcsIter( const Arcs *arcs, IterType type=all ) {
105
- more_arcs = NULL;
106
- if (type == all) {
107
- if (arcs->first_epsilon_arcp) {
108
- current_arcp = arcs->first_epsilon_arcp;
109
- more_arcs = arcs->first_arcp;
110
- }
111
- else
112
- current_arcp = arcs->first_arcp;
113
- }
114
- else if (type == non_eps)
115
- current_arcp = arcs->first_arcp;
116
- else
117
- current_arcp = arcs->first_epsilon_arcp;
118
- };
119
-
120
- void operator++( int ) {
121
- if (current_arcp) {
122
- current_arcp = current_arcp->next;
123
- if (!current_arcp && more_arcs) {
124
- current_arcp = more_arcs;
125
- more_arcs = NULL;
126
- }
127
- }
128
- };
129
- operator Arc*( void ) { return current_arcp; };
130
-
131
- };
132
-
133
-
134
- /***************** class Node ************************************/
135
-
136
- class Node {
137
-
138
- private:
139
- bool final;
140
- VType visited;
141
- Arcs arcsp;
142
- Node *forwardp;
143
-
144
- public:
145
- Node( void ) { init(); };
146
- void init( void );
147
- bool is_final( void ) const { return final; };
148
- void set_final( bool flag ) { final = flag; };
149
- void set_forward( Node *node ) { forwardp = node; };
150
- const Node *target_node( Label l ) const { return arcs()->target_node(l); };
151
- Node *target_node( Label l ) { return arcs()->target_node(l); };
152
- void add_arc( Label l, Node *n, Transducer *a ) { arcs()->add_arc(l, n, a); };
153
- Arcs *arcs( void ) { return &arcsp; };
154
- const Arcs *arcs( void ) const { return &arcsp; };
155
- Node *forward( void ) { return forwardp; };
156
- void clear_visited( NodeHashSet &nodeset );
157
- bool was_visited( VType vmark ) {
158
- if (visited == vmark)
159
- return true;
160
- visited = vmark;
161
- return false;
162
- };
163
- bool check_visited( VType vm ) // leaves the visited flag unchanged
164
- { return (visited==vm); };
165
- };
166
-
167
-
168
- /***************** class Node2Int *********************************/
169
-
170
- class Node2Int {
171
-
172
- struct hashf {
173
- size_t operator()(const Node *node) const {
174
- return (size_t)node;
175
- }
176
- };
177
- struct equalf {
178
- int operator()(const Node *n1, const Node *n2) const {
179
- return (n1 == n2);
180
- }
181
- };
182
- typedef hash_map<Node*, int, hashf, equalf> NL;
183
-
184
- private:
185
- int current_number;
186
- NL number;
187
-
188
- public:
189
- int &operator[]( Node *node ) {
190
- NL::iterator it=number.find(node);
191
- if (it == number.end())
192
- return number.insert(NL::value_type(node, 0)).first->second;
193
- return it->second;
194
- };
195
- };
196
-
197
-
198
- /***************** class NodeNumbering ****************************/
199
-
200
- class NodeNumbering {
201
-
202
- private:
203
- std::vector<Node*> nodes;
204
- Node2Int nummap;
205
- void number_node( Node*, Transducer& );
206
-
207
- public:
208
- NodeNumbering( Transducer& );
209
- int operator[]( Node *node ) { return nummap[node]; };
210
- size_t number_of_nodes( void ) { return nodes.size(); };
211
- Node *get_node( size_t n ) { return nodes[n]; };
212
- };
213
-
214
-
215
- /***************** class PairMapping ****************************/
216
-
217
- class PairMapping {
218
- // This class is used to map a node pair from two transducers
219
- // to a single node in another transducer
220
-
221
- typedef std::pair<Node*, Node*> NodePair;
222
-
223
- private:
224
- struct hashf {
225
- size_t operator()(const NodePair p) const {
226
- return (size_t)p.first ^ (size_t)p.second;
227
- }
228
- };
229
- struct equalf {
230
- int operator()(const NodePair p1, const NodePair p2) const {
231
- return (p1.first==p2.first && p1.second == p2.second);
232
- }
233
- };
234
- typedef hash_map<NodePair, Node*, hashf, equalf> PairMap;
235
- PairMap pm;
236
-
237
- public:
238
- typedef PairMap::iterator iterator;
239
- iterator begin( void ) { return pm.begin(); };
240
- iterator end( void ) { return pm.end(); };
241
- iterator find( Node *n1, Node *n2 )
242
- { return pm.find( NodePair(n1,n2) ); };
243
- Node* &operator[]( NodePair p ) { return pm.operator[](p); };
244
-
245
- };
246
-
247
-
248
- /***************** class Transducer *******************************/
249
-
250
- class Transducer {
251
-
252
- private:
253
- bool deterministic;
254
- bool minimised;
255
- Node root;
256
- Mem mem;
257
-
258
- typedef std::set<Label, Label::label_cmp> LabelSet;
259
- typedef hash_map<Character, char*> SymbolMap;
260
-
261
- void incr_vmark( void ) {
262
- if (++vmark == 0) {
263
- NodeHashSet nodes;
264
- root.clear_visited( nodes );
265
- fprintf(stderr,"clearing flags\n");
266
- vmark = 1;
267
- }
268
- };
269
- void reverse_node( Node *old_node, Transducer *new_node );
270
- Label recode_label( Label, bool lswitch, bool recode, Alphabet& );
271
- Node *copy_nodes( Node *n, Transducer *a,
272
- bool lswitch=false, bool recode=false );
273
- void rec_cat_nodes( Node*, Node* );
274
- bool productive_node( Node* );
275
- bool prune_nodes( Node* );
276
- void negate_nodes( Node*, Node* );
277
- bool compare_nodes( Node *node, Node *node2, Transducer &a2 );
278
- void map_nodes( Node *node, Node *node2, Transducer *a, Level level );
279
- void freely_insert_at_node( Node *node, Label l );
280
- int print_strings_node(Node *node, char *buffer, int pos, FILE *file, bool);
281
- bool infinitely_ambiguous_node( Node* );
282
- bool is_cyclic_node( Node*, NodeHashSet &visited );
283
- bool is_automaton_node( Node* );
284
- bool generate1( Node*, Node2Int&, char*, int, char*, int, FILE* );
285
- void store_symbols( Node*, SymbolMap&, LabelSet& );
286
-
287
- void splice_nodes(Node*, Node*, Label sl, Transducer*, Transducer*);
288
- void splice_arc( Node*, Node*, Node*, Transducer* );
289
- void enumerate_paths_node( Node*, std::vector<Label>&, NodeHashSet&,
290
- std::vector<Transducer*>& );
291
- void replace_char2( Node*, Node*, Character, Character, Transducer* );
292
- Node *create_node( std::vector<Node*>&, char*, size_t line );
293
- void read_transducer_binary( FILE* );
294
- void read_transducer_text( FILE* );
295
-
296
- public:
297
- VType vmark;
298
- Alphabet alphabet; // The set of all labels, i.e. character pairs
299
-
300
- Transducer( void ) : root(), mem()
301
- { vmark = 0; deterministic = minimised = false; };
302
- // convertion of a string to an transducer
303
- Transducer( char *s, const Alphabet *a=NULL, bool extended=false );
304
- // reads a word list from a file and stores it in the transducer
305
- Transducer( std::istream&, const Alphabet *a=NULL, bool verbose=false );
306
- // reads a transducer from a binary or text file
307
- Transducer( FILE*, bool binary=true );
308
- // turns a sequence of labels into a transducer
309
- Transducer( std::vector<Label>& );
310
-
311
- Node *root_node( void ) { return &root; }; // returns the root node
312
- const Node *root_node( void ) const { return &root; }; // returns the root node
313
- Node *new_node( void ); // memory alocation for a new node
314
- Arc *new_arc( Label l, Node *target ); // memory alocation for a new arc
315
- void add_string( char *s, bool extended=false, Alphabet *a=NULL );
316
- void complete_alphabet( void );
317
- void minimise_alphabet( void );
318
- void prune( void ); // remove unnecessary arcs
319
-
320
- int print_strings( FILE*, bool with_brackets=true ); //enumerate all strings
321
-
322
- bool analyze_string( char *s, FILE *file, bool with_brackets=true );
323
- bool generate_string( char *s, FILE *file, bool with_brackets=true );
324
- bool generate( FILE *file, bool separate=false );
325
-
326
- void clear( void ); // clears the transducer. The resulting transducer
327
- // is like one created with Transducer()
328
- // copy duplicates an transducer
329
- // if called with a non-zero argument, upper and lower level are switched
330
- Transducer &copy( bool lswitch=false, const Alphabet *al=NULL );
331
- Transducer &switch_levels( void ) { return copy( true ); };
332
- Transducer &splice( Label l, Transducer *a);
333
- Transducer &freely_insert( Label l );
334
- Transducer &replace_char( Character c, Character nc );
335
- Transducer &level( Level );
336
- Transducer &lower_level( void ) // creates an transducer for the "lower" language
337
- { return level(lower); };
338
- Transducer &upper_level( void ) // creates an transducer for the "upper" language
339
- { return level(upper); };
340
- Transducer &determinise( void ); // creates a deterministic transducer
341
- Transducer &minimise( bool verbose=true ); // creates a minimised transducer
342
- void store( FILE* ); // stores the transducer in binary format
343
- void store_lowmem( FILE* );
344
- void read( FILE* ); // reads an transducer in binary format
345
- bool enumerate_paths( std::vector<Transducer*>& );
346
-
347
- Transducer &reverse( void ); // reverse language
348
- Transducer &operator|( Transducer& ); // union, disjunction
349
- Transducer &operator+( Transducer& ); // concatenation
350
- Transducer &operator/( Transducer& ); // subtraction
351
- Transducer &operator&( Transducer& ); // intersection, conjunction
352
- Transducer &operator||( Transducer& ); // composition
353
- Transducer &operator!( void ); // complement, negation
354
- Transducer &kleene_star( void );
355
- bool operator==( Transducer& ); // minimises its arguments first
356
-
357
- bool is_cyclic( void );
358
- bool is_automaton( void );
359
- bool is_infinitely_ambiguous( void );
360
- bool is_empty( void ); // For efficiency reasons, these functions
361
- bool generates_empty_string( void );// are better called after minimisation
362
-
363
- friend class NodeNumbering;
364
- friend class EdgeCount;
365
- friend class MakeCompactTransducer;
366
- friend std::ostream &operator<<(std::ostream&, Transducer&);
367
- };
368
-
369
- #endif
@@ -1,1842 +0,0 @@
1
- /*******************************************************************/
2
- /* */
3
- /* FILE interface.C */
4
- /* MODULE interface */
5
- /* PROGRAM SFST */
6
- /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
- /* */
8
- /*******************************************************************/
9
-
10
- #include "interface.h"
11
-
12
- #include <fstream>
13
- using std::ifstream;
14
- using std::ofstream;
15
-
16
- #include <set>
17
- using std::set;
18
-
19
- #include "sgi.h"
20
-
21
- using std::cerr;
22
- using std::cout;
23
- using std::vector;
24
-
25
- struct ltstr {
26
- bool operator()(const char* s1, const char* s2) const
27
- { return strcmp(s1, s2) < 0; }
28
- };
29
-
30
- struct eqstr {
31
- bool operator()(const char* s1, const char* s2) const
32
- { return strcmp(s1, s2) == 0; }
33
- };
34
-
35
- typedef set<char*, ltstr> RVarSet;
36
-
37
- typedef hash_map<char*, Transducer*, hash<const char*>, eqstr> VarMap;
38
-
39
- typedef hash_map<char*, Range*, hash<const char*>, eqstr> SVarMap;
40
-
41
- bool Verbose=false;
42
-
43
- Alphabet TheAlphabet;
44
-
45
- static VarMap VM;
46
- static SVarMap SVM;
47
- static RVarSet RS;
48
- static RVarSet RSS;
49
- static int Alphabet_Defined=0;
50
-
51
-
52
- /*******************************************************************/
53
- /* */
54
- /* error */
55
- /* */
56
- /*******************************************************************/
57
-
58
- static void error( const char *message )
59
-
60
- {
61
- cerr << "\nError: " << message << "\naborted.\n";
62
- exit(1);
63
- }
64
-
65
-
66
- /*******************************************************************/
67
- /* */
68
- /* error2 */
69
- /* */
70
- /*******************************************************************/
71
-
72
- void error2( char *message, char *input )
73
-
74
- {
75
- cerr << "\nError: " << message << ": " << input << "\naborted.\n";
76
- exit(1);
77
- }
78
-
79
-
80
- /*******************************************************************/
81
- /* */
82
- /* symbol_code */
83
- /* */
84
- /*******************************************************************/
85
-
86
- Character symbol_code( char *symbol )
87
-
88
- {
89
- int c=TheAlphabet.symbol2code(symbol);
90
- if (c == EOF)
91
- c = TheAlphabet.add_symbol( symbol );
92
- free(symbol);
93
- return (Character)c;
94
- }
95
-
96
-
97
- /*******************************************************************/
98
- /* */
99
- /* character_code */
100
- /* */
101
- /*******************************************************************/
102
-
103
- Character character_code( unsigned int uc )
104
-
105
- {
106
- if (TheAlphabet.utf8)
107
- return symbol_code(fst_strdup(int2utf8(uc)));
108
-
109
- unsigned char *buffer=(unsigned char*)malloc(2);
110
- buffer[0] = (unsigned char)uc;
111
- buffer[1] = 0;
112
- return symbol_code((char*)buffer);
113
- }
114
-
115
-
116
- /*******************************************************************/
117
- /* */
118
- /* add_value */
119
- /* */
120
- /*******************************************************************/
121
-
122
- Range *add_value( Character c, Range *r )
123
-
124
- {
125
- Range *result=new Range;
126
- result->character = c;
127
- result->next = r;
128
- return result;
129
- }
130
-
131
-
132
- /*******************************************************************/
133
- /* */
134
- /* add_values */
135
- /* */
136
- /*******************************************************************/
137
-
138
- Range *add_values( unsigned int c1, unsigned int c2, Range *r )
139
-
140
- {
141
- for( unsigned int c=c2; c>=c1; c-- )
142
- r = add_value(character_code(c), r);
143
- return r;
144
- }
145
-
146
-
147
- /*******************************************************************/
148
- /* */
149
- /* append_values */
150
- /* */
151
- /*******************************************************************/
152
-
153
- Range *append_values( Range *r2, Range *r )
154
-
155
- {
156
- if (r2 == NULL)
157
- return r;
158
- return add_value(r2->character, append_values(r2->next, r));
159
- }
160
-
161
-
162
- /*******************************************************************/
163
- /* */
164
- /* add_var_values */
165
- /* */
166
- /*******************************************************************/
167
-
168
- Range *add_var_values( char *name, Range *r )
169
-
170
- {
171
- return append_values(svar_value(name), r);
172
- }
173
-
174
-
175
- /*******************************************************************/
176
- /* */
177
- /* in_range */
178
- /* */
179
- /*******************************************************************/
180
-
181
- static bool in_range( unsigned int c, Range *r )
182
-
183
- {
184
- while (r) {
185
- if (r->character == c)
186
- return true;
187
- r = r->next;
188
- }
189
- return false;
190
- }
191
-
192
-
193
- /*******************************************************************/
194
- /* */
195
- /* free_values */
196
- /* */
197
- /*******************************************************************/
198
-
199
- static void free_values( Range *r )
200
-
201
- {
202
- if (r) {
203
- free_values(r->next);
204
- delete r;
205
- }
206
- }
207
-
208
-
209
- /*******************************************************************/
210
- /* */
211
- /* free_values */
212
- /* */
213
- /*******************************************************************/
214
-
215
- static void free_values( Ranges *r )
216
-
217
- {
218
- if (r) {
219
- free_values(r->next);
220
- delete r;
221
- }
222
- }
223
-
224
-
225
- /*******************************************************************/
226
- /* */
227
- /* free_contexts */
228
- /* */
229
- /*******************************************************************/
230
-
231
- static void free_contexts( Contexts *c )
232
-
233
- {
234
- if (c) {
235
- free_contexts(c->next);
236
- delete c;
237
- }
238
- }
239
-
240
-
241
- /*******************************************************************/
242
- /* */
243
- /* copy_values */
244
- /* */
245
- /*******************************************************************/
246
-
247
- static Range *copy_values( const Range *r )
248
-
249
- {
250
- if (r == NULL)
251
- return NULL;
252
- return add_value( r->character, copy_values(r->next));
253
- }
254
-
255
-
256
- /*******************************************************************/
257
- /* */
258
- /* complement_range */
259
- /* */
260
- /*******************************************************************/
261
-
262
- Range *complement_range( Range *r )
263
-
264
- {
265
- vector<Character> sym;
266
- for( Range *p=r; p; p=p->next)
267
- sym.push_back( p->character );
268
- free_values( r );
269
-
270
- TheAlphabet.complement(sym);
271
- if (sym.size() == 0)
272
- error("Empty character range!");
273
-
274
-
275
- Range *result=NULL;
276
- for( size_t i=0; i<sym.size(); i++ ) {
277
- Range *tmp = new Range;
278
- tmp->character = sym[i];
279
- tmp->next = result;
280
- result = tmp;
281
- }
282
-
283
- return result;
284
- }
285
-
286
-
287
- /*******************************************************************/
288
- /* */
289
- /* make_transducer */
290
- /* */
291
- /*******************************************************************/
292
-
293
- static Transducer *make_transducer( Range *r1, Range *r2 )
294
-
295
- {
296
- Transducer *t=new Transducer();
297
- Node *node=t->new_node();
298
- node->set_final(1);
299
-
300
- if (r1 == NULL || r2 == NULL) {
301
- if (!Alphabet_Defined)
302
- error("The wildcard symbol '.' requires the definition of an alphabet");
303
-
304
- // one of the ranges was '.'
305
- for(Alphabet::const_iterator it=TheAlphabet.begin();
306
- it!=TheAlphabet.end(); it++)
307
- if ((r1 == NULL || in_range(it->lower_char(), r1)) &&
308
- (r2 == NULL || in_range(it->upper_char(), r2)))
309
- t->root_node()->add_arc( *it, node, t );
310
- }
311
- else {
312
- for (;;) {
313
- Label l(r1->character, r2->character);
314
- // TheAlphabet.insert(l);
315
- t->root_node()->add_arc( l, node, t );
316
- if (!r1->next && !r2->next)
317
- break;
318
- if (r1->next)
319
- r1 = r1->next;
320
- if (r2->next)
321
- r2 = r2->next;
322
- }
323
- }
324
-
325
- return t;
326
- }
327
-
328
-
329
- /*******************************************************************/
330
- /* */
331
- /* empty_transducer */
332
- /* */
333
- /*******************************************************************/
334
-
335
- static Transducer *empty_transducer()
336
-
337
- {
338
- Transducer *t=new Transducer();
339
- t->root_node()->set_final(1);
340
-
341
- return t;
342
- }
343
-
344
-
345
- /*******************************************************************/
346
- /* */
347
- /* one_label_transducer */
348
- /* */
349
- /*******************************************************************/
350
-
351
- static Transducer *one_label_transducer( Label l )
352
-
353
- {
354
- Transducer *t = new Transducer();
355
- Node *last = t->new_node();
356
- t->root_node()->add_arc( l, last, t );
357
- last->set_final(1);
358
-
359
- return t;
360
- }
361
-
362
-
363
- /*******************************************************************/
364
- /* */
365
- /* new_transducer */
366
- /* */
367
- /*******************************************************************/
368
-
369
- Transducer *new_transducer( Range *r1, Range *r2 )
370
-
371
- {
372
- Transducer *t=make_transducer( r1, r2);
373
- if (r1 != r2)
374
- free_values(r1);
375
- free_values(r2);
376
- return t;
377
- }
378
-
379
-
380
- /*******************************************************************/
381
- /* */
382
- /* read_words */
383
- /* */
384
- /*******************************************************************/
385
-
386
- Transducer *read_words( char *filename )
387
-
388
- {
389
- if (Verbose)
390
- fprintf(stderr,"\nreading words from %s...", filename);
391
- ifstream is(filename);
392
- if (!is.is_open()) {
393
- static char message[1000];
394
- sprintf(message,"Error: Cannot open file \"%s\"!", filename);
395
- throw message;
396
- }
397
- free( filename );
398
- Transducer *t = new Transducer(is, &TheAlphabet, Verbose);
399
- is.close();
400
- TheAlphabet.insert_symbols(t->alphabet);
401
- if (Verbose)
402
- fprintf(stderr,"finished\n");
403
- return t;
404
- }
405
-
406
-
407
- /*******************************************************************/
408
- /* */
409
- /* read_transducer */
410
- /* */
411
- /*******************************************************************/
412
-
413
- Transducer *read_transducer( char *filename )
414
-
415
- {
416
- if (Verbose)
417
- fprintf(stderr,"\nreading transducer from %s...", filename);
418
- FILE *file = fopen(filename,"rb");
419
- if (file == NULL) {
420
- static char message[1000];
421
- sprintf(message,"Error: Cannot open file \"%s\"!",filename);
422
- throw message;
423
- }
424
- Transducer t(file);
425
- fclose(file);
426
- if (t.alphabet.utf8 != TheAlphabet.utf8) {
427
- static char message[1000];
428
- sprintf(message,"Error: incompatible character encoding in file \"%s\"!",
429
- filename);
430
- throw message;
431
- }
432
- free( filename );
433
- Transducer *nt = &t.copy(false, &TheAlphabet);
434
- TheAlphabet.insert_symbols(nt->alphabet);
435
- if (Verbose)
436
- fprintf(stderr,"finished\n");
437
- return nt;
438
- }
439
-
440
-
441
- /*******************************************************************/
442
- /* */
443
- /* def_alphabet */
444
- /* */
445
- /*******************************************************************/
446
-
447
- void def_alphabet( Transducer *t )
448
-
449
- {
450
- t = explode(t);
451
- t = minimise(t);
452
- t->alphabet.clear_char_pairs();
453
- t->complete_alphabet();
454
- TheAlphabet.clear_char_pairs();
455
- TheAlphabet.copy(t->alphabet);
456
- Alphabet_Defined = 1;
457
- delete t;
458
- }
459
-
460
-
461
- /*******************************************************************/
462
- /* */
463
- /* def_svar definition of a value range variable */
464
- /* */
465
- /*******************************************************************/
466
-
467
- bool def_svar( char *name, Range *r )
468
-
469
- {
470
- // delete the old value of the variable
471
- SVarMap::iterator it=SVM.find(name);
472
- if (it != SVM.end()) {
473
- char *n=it->first;
474
- Range *v=it->second;
475
- SVM.erase(it);
476
- delete v;
477
- free(n);
478
- }
479
- SVM[name] = r;
480
- return r == NULL;
481
- }
482
-
483
-
484
- /*******************************************************************/
485
- /* */
486
- /* svar_value */
487
- /* */
488
- /*******************************************************************/
489
-
490
- Range *svar_value( char *name )
491
-
492
- {
493
- SVarMap::iterator it=SVM.find(name);
494
- if (it == SVM.end())
495
- error2("undefined variable", name);
496
- free(name);
497
- return copy_values(it->second);
498
- }
499
-
500
-
501
- /*******************************************************************/
502
- /* */
503
- /* rsvar_value */
504
- /* */
505
- /*******************************************************************/
506
-
507
- Range *rsvar_value( char *name )
508
-
509
- {
510
- if (RSS.find(name) == RSS.end())
511
- RSS.insert(fst_strdup(name));
512
- return add_value(symbol_code(name), NULL);
513
- }
514
-
515
-
516
- /*******************************************************************/
517
- /* */
518
- /* def_var definition of an transducer variable */
519
- /* */
520
- /*******************************************************************/
521
-
522
- bool def_var( char *name, Transducer *t )
523
-
524
- {
525
- // delete the old value of the variable
526
- VarMap::iterator it=VM.find(name);
527
- if (it != VM.end()) {
528
- char *n=it->first;
529
- Transducer *v=it->second;
530
- VM.erase(it);
531
- delete v;
532
- free(n);
533
- }
534
-
535
- t = explode(t);
536
- t = minimise(t);
537
-
538
- VM[name] = t;
539
- return t->is_empty();
540
- }
541
-
542
-
543
- /*******************************************************************/
544
- /* */
545
- /* def_rvar definition of an agreement variable for automata */
546
- /* */
547
- /*******************************************************************/
548
-
549
- bool def_rvar( char *name, Transducer *t )
550
-
551
- {
552
- if (t->is_cyclic())
553
- error2("cyclic transducer assigned to", name);
554
- return def_var( name, t );
555
- }
556
-
557
-
558
- /*******************************************************************/
559
- /* */
560
- /* var_value */
561
- /* */
562
- /*******************************************************************/
563
-
564
- Transducer *var_value( char *name )
565
-
566
- {
567
- VarMap::iterator it=VM.find(name);
568
- if (it == VM.end())
569
- error2("undefined variable", name);
570
- free(name);
571
- return &(it->second->copy());
572
- }
573
-
574
-
575
- /*******************************************************************/
576
- /* */
577
- /* rvar_value */
578
- /* */
579
- /*******************************************************************/
580
-
581
- Transducer *rvar_value( char *name )
582
-
583
- {
584
- if (RS.find(name) == RS.end())
585
- RS.insert(fst_strdup(name));
586
- Range *r=add_value(symbol_code(name), NULL);
587
- return new_transducer(r,r);
588
- }
589
-
590
-
591
- /*******************************************************************/
592
- /* */
593
- /* explode */
594
- /* */
595
- /*******************************************************************/
596
-
597
- Transducer *explode( Transducer *t )
598
-
599
- {
600
- if (RS.size() == 0 && RSS.size() == 0)
601
- return t;
602
-
603
- t = minimise(t);
604
-
605
- vector<char*> name;
606
- for( RVarSet::iterator it=RS.begin(); it!=RS.end(); it++)
607
- name.push_back(*it);
608
- RS.clear();
609
-
610
- // replace all agreement variables
611
- for( size_t i=0; i<name.size(); i++ ) {
612
- Transducer *nt = NULL;
613
- Label l(TheAlphabet.symbol2code(name[i]));
614
- Transducer *vt=var_value(name[i]);
615
-
616
- // enumerate all paths of the transducer
617
- vector<Transducer*> it;
618
- vt->enumerate_paths(it);
619
- delete vt;
620
-
621
- // insert each path
622
- for( size_t i=0; i<it.size(); i++ ) {
623
-
624
- // insertion
625
- Transducer *t1 = &t->splice(l, it[i]);
626
- delete it[i];
627
-
628
- if (nt == NULL)
629
- nt = t1;
630
- else
631
- nt = disjunction(nt, t1);
632
- }
633
- delete t;
634
- t = nt;
635
- }
636
-
637
- name.clear();
638
- for( RVarSet::iterator it=RSS.begin(); it!=RSS.end(); it++)
639
- name.push_back(*it);
640
- RSS.clear();
641
-
642
- // replace all agreement variables
643
- for( size_t i=0; i<name.size(); i++ ) {
644
- Transducer *nt = NULL;
645
- Character c=TheAlphabet.symbol2code(name[i]);
646
- Range *r=svar_value(name[i]);
647
-
648
- // insert each character
649
- while (r != NULL) {
650
-
651
- // insertion
652
- Transducer *t1 = &t->replace_char(c, r->character);
653
-
654
- if (nt == NULL)
655
- nt = t1;
656
- else
657
- nt = disjunction(nt, t1);
658
-
659
- Range *next = r->next;
660
- delete r;
661
- r = next;
662
- }
663
- delete t;
664
- t = nt;
665
- }
666
-
667
- return t;
668
- }
669
-
670
-
671
- /*******************************************************************/
672
- /* */
673
- /* catenate */
674
- /* */
675
- /*******************************************************************/
676
-
677
- Transducer *catenate( Transducer *t1, Transducer *t2 )
678
-
679
- {
680
- Transducer *t = &(*t1 + *t2);
681
- delete t1;
682
- delete t2;
683
- return t;
684
- }
685
-
686
-
687
- /*******************************************************************/
688
- /* */
689
- /* add_range */
690
- /* */
691
- /*******************************************************************/
692
-
693
- Ranges *add_range( Range *r, Ranges *l )
694
-
695
- {
696
- Ranges *result = new Ranges;
697
- result->range = r;
698
- result->next = l;
699
- return result;
700
- }
701
-
702
-
703
- /*******************************************************************/
704
- /* */
705
- /* make_mapping */
706
- /* */
707
- /*******************************************************************/
708
-
709
- Transducer *make_mapping( Ranges *list1, Ranges *list2 )
710
-
711
- {
712
- Ranges *l1=list1;
713
- Ranges *l2=list2;
714
- Transducer *t=new Transducer();
715
-
716
- Node *node=t->root_node();
717
- while (l1 && l2) {
718
- Node *nn=t->new_node();
719
- for( Range *r1=l1->range; r1; r1=r1->next )
720
- for( Range *r2=l2->range; r2; r2=r2->next )
721
- node->add_arc( Label(r1->character, r2->character), nn, t );
722
- node = nn;
723
- l1 = l1->next;
724
- l2 = l2->next;
725
- }
726
- while (l1) {
727
- Node *nn=t->new_node();
728
- for( Range *r1=l1->range; r1; r1=r1->next )
729
- node->add_arc( Label(r1->character, Label::epsilon), nn, t );
730
- node = nn;
731
- l1 = l1->next;
732
- }
733
- while (l2) {
734
- Node *nn=t->new_node();
735
- for( Range *r2=l2->range; r2; r2=r2->next )
736
- node->add_arc( Label(Label::epsilon, r2->character), nn, t );
737
- node = nn;
738
- l2 = l2->next;
739
- }
740
- node->set_final(1);
741
-
742
- free_values(list1);
743
- free_values(list2);
744
- return t;
745
- }
746
-
747
-
748
- /*******************************************************************/
749
- /* */
750
- /* disjunction */
751
- /* */
752
- /*******************************************************************/
753
-
754
- Transducer *disjunction( Transducer *t1, Transducer *t2 )
755
-
756
- {
757
- Transducer *t = &(*t1 | *t2);
758
- delete t1;
759
- delete t2;
760
- return t;
761
- }
762
-
763
-
764
- /*******************************************************************/
765
- /* */
766
- /* conjunction */
767
- /* */
768
- /*******************************************************************/
769
-
770
- Transducer *conjunction( Transducer *t1, Transducer *t2 )
771
-
772
- {
773
- if (RS.size() > 0 || RSS.size() > 0)
774
- cerr << "\nWarning: agreement operation inside of conjunction!\n";
775
- Transducer *t = &(*t1 & *t2);
776
- delete t1;
777
- delete t2;
778
- return t;
779
- }
780
-
781
-
782
- /*******************************************************************/
783
- /* */
784
- /* subtraction */
785
- /* */
786
- /*******************************************************************/
787
-
788
- Transducer *subtraction( Transducer *t1, Transducer *t2 )
789
-
790
- {
791
- if (RS.size() > 0 || RSS.size() > 0)
792
- cerr << "\nWarning: agreement operation inside of conjunction!\n";
793
- Transducer *t = &(*t1 / *t2);
794
- delete t1;
795
- delete t2;
796
- return t;
797
- }
798
-
799
-
800
- /*******************************************************************/
801
- /* */
802
- /* composition */
803
- /* */
804
- /*******************************************************************/
805
-
806
- Transducer *composition( Transducer *t1, Transducer *t2 )
807
-
808
- {
809
- if (RS.size() > 0 || RSS.size() > 0)
810
- cerr << "\nWarning: agreement operation inside of composition!\n";
811
- Transducer *t = &(*t1 || *t2);
812
- delete t1;
813
- delete t2;
814
- return t;
815
- }
816
-
817
- /*******************************************************************/
818
- /* */
819
- /* freely_insert */
820
- /* */
821
- /*******************************************************************/
822
-
823
- Transducer *freely_insert( Transducer *t, Character lc, Character uc )
824
-
825
- {
826
- return &t->freely_insert(Label(lc,uc));
827
- }
828
-
829
-
830
- /*******************************************************************/
831
- /* */
832
- /* negation */
833
- /* */
834
- /*******************************************************************/
835
-
836
- Transducer *negation( Transducer *t )
837
-
838
- {
839
- if (RS.size() > 0 || RSS.size() > 0)
840
- cerr << "\nWarning: agreement operation inside of negation!\n";
841
- if (!Alphabet_Defined)
842
- error("Negation requires the definition of an alphabet");
843
- t->alphabet.clear_char_pairs();
844
- t->alphabet.copy(TheAlphabet);
845
- Transducer *nt = &(!*t);
846
- delete t;
847
- return nt;
848
- }
849
-
850
-
851
- /*******************************************************************/
852
- /* */
853
- /* upper_level */
854
- /* */
855
- /*******************************************************************/
856
-
857
- Transducer *upper_level( Transducer *t )
858
-
859
- {
860
- Transducer *nt = &t->upper_level();
861
- delete t;
862
- return nt;
863
- }
864
-
865
-
866
- /*******************************************************************/
867
- /* */
868
- /* lower_level */
869
- /* */
870
- /*******************************************************************/
871
-
872
- Transducer *lower_level( Transducer *t )
873
-
874
- {
875
- Transducer *nt = &t->lower_level();
876
- delete t;
877
- return nt;
878
- }
879
-
880
-
881
- /*******************************************************************/
882
- /* */
883
- /* minimise */
884
- /* */
885
- /*******************************************************************/
886
-
887
- Transducer *minimise( Transducer *t )
888
-
889
- {
890
- t->alphabet.copy(TheAlphabet);
891
- Transducer *nt = &t->minimise( Verbose );
892
- delete t;
893
- return nt;
894
- }
895
-
896
-
897
- /*******************************************************************/
898
- /* */
899
- /* switch_levels */
900
- /* */
901
- /*******************************************************************/
902
-
903
- Transducer *switch_levels( Transducer *t )
904
-
905
- {
906
- Transducer *nt = &t->switch_levels();
907
- delete t;
908
- return nt;
909
- }
910
-
911
-
912
- /*******************************************************************/
913
- /* */
914
- /* repetition */
915
- /* */
916
- /*******************************************************************/
917
-
918
- Transducer *repetition( Transducer *t )
919
-
920
- {
921
- Transducer *nt = &(t->kleene_star());
922
- delete t;
923
- return nt;
924
- }
925
-
926
-
927
- /*******************************************************************/
928
- /* */
929
- /* repetition2 */
930
- /* */
931
- /*******************************************************************/
932
-
933
- Transducer *repetition2( Transducer *t )
934
-
935
- {
936
- Transducer *t1 = &(t->kleene_star());
937
- Transducer *nt = &(*t + *t1);
938
- delete t;
939
- delete t1;
940
- return nt;
941
- }
942
-
943
-
944
- /*******************************************************************/
945
- /* */
946
- /* optional */
947
- /* */
948
- /*******************************************************************/
949
-
950
- Transducer *optional( Transducer *t )
951
-
952
- {
953
- Transducer *nt = &(t->copy());
954
- nt->root_node()->set_final(1);
955
- delete t;
956
- return nt;
957
- }
958
-
959
-
960
- /*******************************************************************/
961
- /* */
962
- /* add_pi_transitions */
963
- /* */
964
- /*******************************************************************/
965
-
966
- static void add_pi_transitions( Transducer *t, Node *node, Alphabet &alph )
967
-
968
- {
969
- for( Alphabet::const_iterator it=alph.begin(); it!=alph.end(); it++)
970
- node->add_arc( *it, node, t );
971
- }
972
-
973
-
974
- /*******************************************************************/
975
- /* */
976
- /* pi_machine */
977
- /* */
978
- /*******************************************************************/
979
-
980
- static Transducer *pi_machine( Alphabet &alph )
981
-
982
- {
983
- Transducer *t=new Transducer();
984
- t->root_node()->set_final(1);
985
- add_pi_transitions( t, t->root_node(), alph );
986
- return t;
987
- }
988
-
989
-
990
- /*******************************************************************/
991
- /* */
992
- /* cp */
993
- /* */
994
- /*******************************************************************/
995
-
996
- static Transducer *cp( Range *lower_range, Range *upper_range )
997
-
998
- {
999
- return make_transducer(lower_range, upper_range);
1000
- }
1001
-
1002
-
1003
- /*******************************************************************/
1004
- /* */
1005
- /* anti_cp */
1006
- /* */
1007
- /*******************************************************************/
1008
-
1009
- static Transducer *anti_cp( Range *lower_range, Range *upper_range )
1010
-
1011
- {
1012
- Transducer *cpt = cp(lower_range, upper_range);
1013
- Transducer *t=new Transducer();
1014
- Node *node=t->new_node();
1015
-
1016
- node->set_final(1);
1017
- for(Alphabet::const_iterator it=TheAlphabet.begin();
1018
- it!=TheAlphabet.end(); it++){
1019
- Label l=*it;
1020
- if (in_range(l.lower_char(), lower_range) &&
1021
- !cpt->root_node()->target_node(l))
1022
- t->root_node()->add_arc( l, node, t );
1023
- }
1024
- if (in_range(Label::epsilon, lower_range) &&
1025
- !cpt->root_node()->target_node(Label()))
1026
- t->root_node()->add_arc( Label(), node, t );
1027
-
1028
- delete cpt;
1029
- return t;
1030
- }
1031
-
1032
-
1033
- /*******************************************************************/
1034
- /* */
1035
- /* twol_right_rule */
1036
- /* */
1037
- /*******************************************************************/
1038
-
1039
- static Transducer *twol_right_rule( Transducer *lc, Range *lower_range,
1040
- Range *upper_range, Transducer *rc )
1041
-
1042
- {
1043
- // Build the rule transducer
1044
- Transducer *cpt = cp(lower_range, upper_range);
1045
- Transducer *pi=pi_machine(TheAlphabet);
1046
-
1047
- // First unwanted language
1048
-
1049
- lc->alphabet.copy(TheAlphabet);
1050
- Transducer *notlc = &(!*lc);
1051
- Transducer *tmp = &(*notlc + *cpt);
1052
- delete notlc;
1053
- Transducer *t1 = &(*tmp + *pi);
1054
- delete tmp;
1055
-
1056
- // Second unwanted language
1057
- rc->alphabet.copy(TheAlphabet);
1058
- Transducer *notrc = &(!*rc);
1059
- tmp = &(*cpt + *notrc);
1060
- delete cpt;
1061
- delete notrc;
1062
- Transducer *t2 = &(*pi + *tmp);
1063
- delete pi;
1064
- delete tmp;
1065
-
1066
- tmp = &(*t1|*t2);
1067
- delete t1;
1068
- delete t2;
1069
-
1070
- tmp->alphabet.copy(TheAlphabet);
1071
- t1 = &(!*tmp);
1072
- delete tmp;
1073
-
1074
- return t1;
1075
- }
1076
-
1077
-
1078
- /*******************************************************************/
1079
- /* */
1080
- /* twol_left_rule */
1081
- /* */
1082
- /*******************************************************************/
1083
-
1084
- static Transducer *twol_left_rule( Transducer *lc, Range *lower_range,
1085
- Range *upper_range, Transducer *rc )
1086
-
1087
- {
1088
- // check for problematic insertion operations like "$L <> <= a $R"
1089
- // where either $L or $R includes the empty string
1090
- if (in_range(Label::epsilon, lower_range)) {
1091
- if (lc->generates_empty_string())
1092
- error("in two level rule: insertion operation with deletable left context!");
1093
- if (rc->generates_empty_string())
1094
- error("in two level rule: insertion operation with deletable right context!");
1095
- cerr << "\nWarning: two level rule used for insertion operation (might produce unexpected results)\n";
1096
- }
1097
-
1098
- // Build the rule transducer
1099
- Transducer *t1 = anti_cp(lower_range, upper_range);
1100
-
1101
- // Add the left context;
1102
- Transducer *t2 = &(*lc + *t1);
1103
- delete t1;
1104
-
1105
- // Add the right context;
1106
- t1 = &(*t2 + *rc);
1107
- delete t2;
1108
-
1109
- // Form the complement
1110
- t1->alphabet.copy(TheAlphabet);
1111
- t2 = &(!*t1);
1112
- delete t1;
1113
-
1114
- return t2;
1115
- }
1116
-
1117
-
1118
- /*******************************************************************/
1119
- /* */
1120
- /* make_rule */
1121
- /* */
1122
- /*******************************************************************/
1123
-
1124
- Transducer *make_rule( Transducer *lc, Range *lower_range, Twol_Type type,
1125
- Range *upper_range, Transducer *rc )
1126
-
1127
- {
1128
- if (RS.size() > 0 || RSS.size() > 0)
1129
- cerr << "\nWarning: agreement operation inside of replacement rule!\n";
1130
-
1131
- if (!Alphabet_Defined)
1132
- error("Two level rules require the definition of an alphabet");
1133
-
1134
- // expand the left and the right contexts to their full length
1135
- Transducer *pi=pi_machine(TheAlphabet);
1136
-
1137
- if (lc == NULL)
1138
- lc = pi_machine(TheAlphabet);
1139
- else {
1140
- Transducer *tmp = &(*pi + *lc);
1141
- delete lc;
1142
- lc = tmp;
1143
- }
1144
- if (rc == NULL)
1145
- rc = pi_machine(TheAlphabet);
1146
- else {
1147
- Transducer *tmp = &(*rc + *pi);
1148
- delete rc;
1149
- rc = tmp;
1150
- }
1151
- delete pi;
1152
-
1153
- Transducer *result = NULL;
1154
-
1155
- switch (type) {
1156
- case twol_left:
1157
- result = twol_left_rule(lc, lower_range, upper_range, rc);
1158
- break;
1159
- case twol_right:
1160
- result = twol_right_rule(lc, lower_range, upper_range, rc);
1161
- break;
1162
- case twol_both:
1163
- {
1164
- Transducer *t1 = twol_left_rule(lc, lower_range, upper_range, rc);
1165
- Transducer *t2 = twol_right_rule(lc, lower_range, upper_range, rc);
1166
- result = &(*t1 & *t2);
1167
- delete t1;
1168
- delete t2;
1169
- }
1170
- }
1171
- delete lc;
1172
- delete rc;
1173
- if (lower_range != upper_range)
1174
- free_values(lower_range);
1175
- free_values(upper_range);
1176
-
1177
- return minimise(result);
1178
- }
1179
-
1180
-
1181
- /*******************************************************************/
1182
- /* */
1183
- /* make_context */
1184
- /* */
1185
- /*******************************************************************/
1186
-
1187
- Contexts *make_context( Transducer *l, Transducer *r )
1188
-
1189
- {
1190
- if (l == NULL)
1191
- l = empty_transducer();
1192
- if (r == NULL)
1193
- r = empty_transducer();
1194
-
1195
- Contexts *c=new Contexts();
1196
- c->left = l;
1197
- c->right = r;
1198
- c->next = NULL;
1199
-
1200
- return c;
1201
- }
1202
-
1203
-
1204
- /*******************************************************************/
1205
- /* */
1206
- /* add_context */
1207
- /* */
1208
- /*******************************************************************/
1209
-
1210
- Contexts *add_context( Contexts *nc, Contexts *c )
1211
-
1212
- {
1213
- nc->next = c;
1214
- return nc;
1215
- }
1216
-
1217
-
1218
- /*******************************************************************/
1219
- /* */
1220
- /* restriction_transducer */
1221
- /* */
1222
- /*******************************************************************/
1223
-
1224
- static Transducer *restriction_transducer( Transducer *l1, Transducer *l2,
1225
- Character marker )
1226
-
1227
- {
1228
- l1->alphabet.copy(TheAlphabet);
1229
- Transducer *t1 = &(*l1 / *l2);
1230
-
1231
- Transducer *t2 = &t1->replace_char(marker, Label::epsilon);
1232
- delete t1;
1233
-
1234
- t2->alphabet.copy(TheAlphabet);
1235
- t1 = &(!*t2);
1236
- delete t2;
1237
-
1238
- return t1;
1239
- }
1240
-
1241
-
1242
- /*******************************************************************/
1243
- /* */
1244
- /* marker_transducer */
1245
- /* */
1246
- /*******************************************************************/
1247
-
1248
- static Transducer *marker_transducer( Transducer *t, Contexts *c,
1249
- Character &marker )
1250
- {
1251
- marker = TheAlphabet.new_marker();
1252
- Transducer *result = one_label_transducer( Label(marker) );
1253
-
1254
- // build the alphabet with a new marker
1255
- result->alphabet.insert_symbols(t->alphabet);
1256
- while (c) {
1257
- result->alphabet.insert_symbols(c->left->alphabet);
1258
- result->alphabet.insert_symbols(c->right->alphabet);
1259
- c = c->next;
1260
- }
1261
-
1262
- return result;
1263
- }
1264
-
1265
-
1266
- /*******************************************************************/
1267
- /* */
1268
- /* center_transducer */
1269
- /* */
1270
- /*******************************************************************/
1271
-
1272
- static Transducer *center_transducer( Transducer *t, Transducer *pi,
1273
- Transducer *mt )
1274
- {
1275
- // create the concatenation pi + mt + *t + mt + pi
1276
- Transducer *t1=&(*pi + *mt);
1277
- Transducer *t2=&(*t1 + *t);
1278
- delete t1;
1279
- t1 = &(*t2 + *mt);
1280
- delete t2;
1281
- t2 = &(*t1 + *pi);
1282
- delete t1;
1283
- return t2;
1284
- }
1285
-
1286
-
1287
- /*******************************************************************/
1288
- /* */
1289
- /* context_transducer */
1290
- /* */
1291
- /*******************************************************************/
1292
-
1293
- static Transducer *context_transducer( Transducer *t, Transducer *pi,
1294
- Transducer *mt, Contexts *c )
1295
- {
1296
- // pi + left[i] + mt + pi + mt + right[i] + pi
1297
-
1298
- Transducer *t1 = &(*mt + *t);
1299
- Transducer *tmp = &(*t1 + *mt);
1300
- delete t1;
1301
- Transducer *result=NULL;
1302
-
1303
- while (c) {
1304
- t1 = &(*pi + *c->left);
1305
- Transducer *t2 = &(*t1 + *tmp);
1306
- delete t1;
1307
- t1 = &(*t2 + *c->right);
1308
- delete t2;
1309
- t2 = &(*t1 + *pi);
1310
- delete t1;
1311
-
1312
- if (result) {
1313
- t1 = &(*result | *t2);
1314
- delete t2;
1315
- result = t1;
1316
- }
1317
- else
1318
- result = t2;
1319
-
1320
- c = c->next;
1321
- }
1322
- delete tmp;
1323
-
1324
- return result;
1325
- }
1326
-
1327
-
1328
-
1329
- /*******************************************************************/
1330
- /* */
1331
- /* result_transducer */
1332
- /* */
1333
- /*******************************************************************/
1334
-
1335
- static Transducer *result_transducer( Transducer *l1, Transducer *l2,
1336
- Twol_Type type, Character marker )
1337
- {
1338
- Transducer *result=NULL;
1339
- if (type == twol_right)
1340
- result = restriction_transducer( l1, l2, marker );
1341
- else if (type == twol_left)
1342
- result = restriction_transducer( l2, l1, marker );
1343
- else if (type == twol_both) {
1344
- Transducer *t1 = restriction_transducer( l1, l2, marker );
1345
- Transducer *t2 = restriction_transducer( l2, l1, marker );
1346
- result = &(*t1 & *t2);
1347
- delete t1;
1348
- delete t2;
1349
- }
1350
-
1351
- return result;
1352
- }
1353
-
1354
-
1355
- /*******************************************************************/
1356
- /* */
1357
- /* restriction */
1358
- /* */
1359
- /*******************************************************************/
1360
-
1361
- Transducer *restriction( Transducer *t, Twol_Type type, Contexts *c,
1362
- int direction )
1363
- {
1364
- Character marker;
1365
- Transducer *mt=marker_transducer( t, c, marker );
1366
- Transducer *pi=pi_machine(TheAlphabet);
1367
- Transducer *l1=center_transducer( t, pi, mt );
1368
-
1369
- Transducer *tmp;
1370
- if (direction == 0)
1371
- tmp = pi;
1372
- else if (direction == 1) {
1373
- // compute _t || .*
1374
- Transducer *t1 = &t->lower_level();
1375
- tmp = &(*t1 || *pi);
1376
- delete t1;
1377
- }
1378
- else {
1379
- // compute ^t || .*
1380
- Transducer *t1 = &t->upper_level();
1381
- tmp = &(*pi || *t1);
1382
- delete t1;
1383
- }
1384
- delete t;
1385
-
1386
- Transducer *l2=context_transducer( tmp, pi, mt, c );
1387
- if (tmp != pi)
1388
- delete tmp;
1389
- delete pi;
1390
- delete mt;
1391
-
1392
- Transducer *result=result_transducer( l1, l2, type, marker );
1393
- delete l1;
1394
- delete l2;
1395
-
1396
- free_contexts( c );
1397
-
1398
- return result;
1399
- }
1400
-
1401
-
1402
- /*******************************************************************/
1403
- /* */
1404
- /* constrain_boundary_transducer */
1405
- /* */
1406
- /*******************************************************************/
1407
-
1408
- static Transducer *constrain_boundary_transducer( Character leftm,
1409
- Character rightm )
1410
- {
1411
- // create the transducer (.|<L>|<R>)*
1412
-
1413
- Transducer *tmp=pi_machine(TheAlphabet);
1414
-
1415
- // create the transducer (.|<L>|<R>)* <L><R> (.|<L>|<R>)*
1416
- Node *root = tmp->root_node();
1417
- Node *node = tmp->new_node();
1418
- Node *last = tmp->new_node();
1419
-
1420
- root->set_final(0);
1421
- last->set_final(1);
1422
-
1423
- root->add_arc( Label(leftm), node, tmp);
1424
- node->add_arc( Label(rightm), last, tmp);
1425
-
1426
- add_pi_transitions( tmp, last, TheAlphabet );
1427
-
1428
- // create the transducer !((.|<L>|<R>)* <L><R> (.|<L>|<R>)*)
1429
- tmp->alphabet.copy(TheAlphabet);
1430
- Transducer *result = &(!*tmp);
1431
- delete tmp;
1432
-
1433
- return result;
1434
- }
1435
-
1436
-
1437
- /*******************************************************************/
1438
- /* */
1439
- /* extended_left_transducer */
1440
- /* */
1441
- /*******************************************************************/
1442
-
1443
- static Transducer *extended_left_transducer( Transducer *t,
1444
- Character m1, Character m2 )
1445
- {
1446
- if (t == NULL) // empty context
1447
- return pi_machine(TheAlphabet);
1448
-
1449
- // Extended left context transducer
1450
-
1451
- // <R> >> (<L> >> $T$)
1452
- Transducer *tmp=&t->freely_insert( Label(m1) );
1453
- delete t;
1454
- t = &tmp->freely_insert( Label(m2) );
1455
- delete tmp;
1456
-
1457
- // .* (<R> >> (<L> >> $T$))
1458
- add_pi_transitions( t, t->root_node(), TheAlphabet );
1459
-
1460
- // !(.*<L>)
1461
- tmp = one_label_transducer(Label(m1));
1462
- add_pi_transitions( tmp, tmp->root_node(), TheAlphabet );
1463
- tmp->alphabet.copy(TheAlphabet);
1464
- Transducer *t2 = &(!*tmp);
1465
- delete tmp;
1466
-
1467
- // .* (<R> >> (<L> >> $T$)) || !(.*<L>)
1468
- tmp = &(*t || *t2);
1469
- delete t;
1470
- delete t2;
1471
-
1472
- return tmp;
1473
- }
1474
-
1475
-
1476
- /*******************************************************************/
1477
- /* */
1478
- /* left_context */
1479
- /* */
1480
- /*******************************************************************/
1481
-
1482
- static Transducer *left_context( Transducer *t, Character m1, Character m2 )
1483
-
1484
- {
1485
- // .* (<R> >> (<L> >> $T$)) || !(.*<L>)
1486
- Transducer *ct = extended_left_transducer(t, m1, m2);
1487
-
1488
- // <R>* <L> .*
1489
- Transducer *mt = one_label_transducer(Label(m1));
1490
- mt->root_node()->add_arc(Label(m2), mt->root_node(), mt );
1491
- add_pi_transitions(mt, mt->root_node()->target_node(Label(m1)),TheAlphabet);
1492
-
1493
- ct->alphabet.copy(TheAlphabet);
1494
- Transducer *no_ct = &!*ct;
1495
-
1496
- mt->alphabet.copy(TheAlphabet);
1497
- Transducer *no_mt = &!*mt;
1498
-
1499
- {
1500
- static int print=1;
1501
- if (print) {
1502
- print = 0;
1503
- Transducer *temp = &(ct->copy());
1504
- temp = &(no_ct->copy());
1505
- temp = &(mt->copy());
1506
- temp = &(no_mt->copy());
1507
- }
1508
- }
1509
-
1510
- Transducer *t1 = &(*no_ct + *mt);
1511
- delete no_ct;
1512
- delete mt;
1513
-
1514
- Transducer *t2 = &(*ct + *no_mt);
1515
- delete ct;
1516
- delete no_mt;
1517
-
1518
- Transducer *tmp = &(*t1 | *t2);
1519
- delete t1;
1520
- delete t2;
1521
-
1522
- tmp->alphabet.copy(TheAlphabet);
1523
- t1 = &!*tmp;
1524
- delete tmp;
1525
-
1526
- return t1;
1527
- }
1528
-
1529
-
1530
- /*******************************************************************/
1531
- /* */
1532
- /* make_optional */
1533
- /* */
1534
- /*******************************************************************/
1535
-
1536
- static Transducer *make_optional( Transducer *t )
1537
-
1538
- {
1539
- Transducer *t1 = pi_machine(TheAlphabet);
1540
- Transducer *t2 = &(*t | *t1);
1541
- delete t;
1542
- delete t1;
1543
- return t2;
1544
- }
1545
-
1546
-
1547
- /*******************************************************************/
1548
- /* */
1549
- /* replace */
1550
- /* */
1551
- /*******************************************************************/
1552
-
1553
- Transducer *replace( Transducer *ct, Repl_Type type, bool optional )
1554
-
1555
- {
1556
- // compute the no-center transducer
1557
- Transducer *tmp;
1558
-
1559
- if (type == repl_up)
1560
- // _ct
1561
- tmp = &ct->lower_level();
1562
- else if (type == repl_down)
1563
- // ^ct
1564
- tmp = &ct->upper_level();
1565
- else
1566
- error("Invalid type of replace operator");
1567
-
1568
- // .* _ct
1569
- add_pi_transitions( tmp, tmp->root_node(), TheAlphabet );
1570
-
1571
- // .* _ct .*
1572
- Transducer *t2 = pi_machine(TheAlphabet);
1573
- Transducer *t3 = &(*tmp + *t2);
1574
- delete tmp;
1575
- delete t2;
1576
-
1577
- // no_ct = !(.* _ct .*)
1578
- t3->alphabet.copy(TheAlphabet);
1579
- Transducer *no_ct = &(!*t3);
1580
- delete t3;
1581
-
1582
- // compute the unconditional replacement transducer
1583
-
1584
- // no-ct ct
1585
- tmp = &(*no_ct + *ct);
1586
- delete ct;
1587
-
1588
- // (no-ct ct)*
1589
- t2 = &(tmp->kleene_star());
1590
- delete tmp;
1591
-
1592
- // (no-ct ct)* no-ct
1593
- tmp = &(*t2 + *no_ct);
1594
- delete t2;
1595
- delete no_ct;
1596
-
1597
- if (optional)
1598
- tmp = make_optional(tmp);
1599
-
1600
- return tmp;
1601
- }
1602
-
1603
-
1604
- /*******************************************************************/
1605
- /* */
1606
- /* replace_transducer */
1607
- /* */
1608
- /*******************************************************************/
1609
-
1610
- static Transducer *replace_transducer( Transducer *ct, Character lm,
1611
- Character rm, Repl_Type type )
1612
- {
1613
- // insert boundary markers into the center transducer
1614
-
1615
- // <L> >> (<R> >> $Center$)
1616
- Transducer *tmp = &ct->freely_insert(Label(lm));
1617
- delete ct;
1618
- ct = &tmp->freely_insert(Label(rm));
1619
- delete tmp;
1620
-
1621
- // add surrounding boundary markers to the center transducer
1622
-
1623
- // <L> (<L> >> (<R> >> $Center$))
1624
- Transducer *t2 = one_label_transducer( Label(lm) );
1625
- tmp = &(*t2 + *ct);
1626
- delete t2;
1627
- delete ct;
1628
-
1629
- // $CenterB$ = <L> (<L> >> (<R> >> $Center$)) <R>
1630
- t2 = one_label_transducer( Label(rm) );
1631
- ct = &(*tmp + *t2);
1632
- delete tmp;
1633
- delete t2;
1634
-
1635
- return replace(ct, type, false);
1636
- }
1637
-
1638
-
1639
- /*******************************************************************/
1640
- /* */
1641
- /* replace_in_context */
1642
- /* */
1643
- /*******************************************************************/
1644
-
1645
- Transducer *replace_in_context( Transducer *t, Repl_Type type, Contexts *c,
1646
- bool optional )
1647
- {
1648
- // The implementation of the replace operators is based on
1649
- // "The Replace Operator" by Lauri Karttunen
1650
-
1651
- if (!Alphabet_Defined)
1652
- error("The replace operators require the definition of an alphabet");
1653
-
1654
- if (!c->left->is_automaton() || !c->right->is_automaton())
1655
- error("The replace operators require automata as context expressions!");
1656
-
1657
- // create the marker symbols
1658
- Character leftm = TheAlphabet.new_marker();
1659
- Character rightm = TheAlphabet.new_marker();
1660
-
1661
- /////////////////////////////////////////////////////////////
1662
- // Create the insert boundaries transducer (.|<>:<L>|<>:<R>)*
1663
- /////////////////////////////////////////////////////////////
1664
-
1665
- Transducer *ibt=pi_machine(TheAlphabet);
1666
- Node *root=ibt->root_node();
1667
- root->add_arc( Label(Label::epsilon, leftm), root, ibt);
1668
- root->add_arc( Label(Label::epsilon, rightm),root, ibt);
1669
-
1670
- /////////////////////////////////////////////////////////////
1671
- // Create the remove boundaries transducer (.|<L>:<>|<R>:<>)*
1672
- /////////////////////////////////////////////////////////////
1673
-
1674
- Transducer *rbt=pi_machine(TheAlphabet);
1675
- root = rbt->root_node();
1676
- root->add_arc( Label(leftm, Label::epsilon), root, rbt);
1677
- root->add_arc( Label(rightm,Label::epsilon), root, rbt);
1678
-
1679
- // Add the markers to the alphabet
1680
- TheAlphabet.insert(Label(leftm));
1681
- TheAlphabet.insert(Label(rightm));
1682
-
1683
- /////////////////////////////////////////////////////////////
1684
- // Create the constrain boundaries transducer !(.*<L><R>.*)
1685
- /////////////////////////////////////////////////////////////
1686
-
1687
- Transducer *cbt=constrain_boundary_transducer(leftm, rightm);
1688
-
1689
- /////////////////////////////////////////////////////////////
1690
- // Create the extended context transducers
1691
- /////////////////////////////////////////////////////////////
1692
-
1693
- // left context transducer: .* (<R> >> (<L> >> $T$)) || !(.*<L>)
1694
- Transducer *lct = left_context(c->left, leftm, rightm);
1695
-
1696
- // right context transducer: (<R> >> (<L> >> $T$)) .* || !(<R>.*)
1697
- Transducer *tmp = &c->right->reverse();
1698
- delete c->right;
1699
- Transducer *t2 = left_context(tmp, rightm, leftm);
1700
- Transducer *rct = &t2->reverse();
1701
- delete t2;
1702
-
1703
- /////////////////////////////////////////////////////////////
1704
- // unconditional replace transducer
1705
- /////////////////////////////////////////////////////////////
1706
-
1707
- Transducer *rt;
1708
- if (type == repl_up || type == repl_right || type == repl_left)
1709
- rt = replace_transducer( t, leftm, rightm, repl_up );
1710
- else
1711
- rt = replace_transducer( t, leftm, rightm, repl_down );
1712
-
1713
- /////////////////////////////////////////////////////////////
1714
- // build the conditional replacement transducer
1715
- /////////////////////////////////////////////////////////////
1716
-
1717
- tmp = &(ibt->copy());
1718
- tmp = &(cbt->copy());
1719
- tmp = &(lct->copy());
1720
- tmp = &(rct->copy());
1721
- tmp = &(rt->copy());
1722
- tmp = &(rbt->copy());
1723
-
1724
- tmp = ibt;
1725
- tmp = &(*ibt || *cbt);
1726
- delete(ibt);
1727
- delete(cbt);
1728
-
1729
- if (type == repl_up || type == repl_left) {
1730
- t2 = &(*tmp || *lct);
1731
- delete tmp;
1732
- delete lct;
1733
- tmp = t2;
1734
- }
1735
- if (type == repl_up || type == repl_right) {
1736
- t2 = &(*tmp || *rct);
1737
- delete tmp;
1738
- delete rct;
1739
- tmp = t2;
1740
- }
1741
-
1742
- t2 = &(*tmp || *rt);
1743
- delete tmp;
1744
- delete rt;
1745
- tmp = t2;
1746
-
1747
- if (type == repl_down || type == repl_right) {
1748
- t2 = &(*tmp || *lct);
1749
- delete tmp;
1750
- delete lct;
1751
- tmp = t2;
1752
- }
1753
- if (type == repl_down || type == repl_left) {
1754
- t2 = &(*tmp || *rct);
1755
- delete tmp;
1756
- delete rct;
1757
- tmp = t2;
1758
- }
1759
-
1760
- t2 = &(*tmp || *rbt);
1761
- delete tmp;
1762
- delete rbt;
1763
-
1764
- // Remove the markers from the alphabet
1765
- TheAlphabet.delete_markers();
1766
-
1767
- if (optional)
1768
- t2 = make_optional(t2);
1769
-
1770
- free_contexts( c );
1771
-
1772
- return t2;
1773
- }
1774
-
1775
-
1776
- /*******************************************************************/
1777
- /* */
1778
- /* add_alphabet */
1779
- /* */
1780
- /*******************************************************************/
1781
-
1782
- void add_alphabet( Transducer *t )
1783
-
1784
- {
1785
- t->alphabet.copy(TheAlphabet);
1786
- t->complete_alphabet();
1787
- }
1788
-
1789
-
1790
- /*******************************************************************/
1791
- /* */
1792
- /* write_to_file */
1793
- /* */
1794
- /*******************************************************************/
1795
-
1796
- void write_to_file( Transducer *t, char *filename)
1797
-
1798
- {
1799
- FILE *file;
1800
- if ((file = fopen(filename,"wb")) == NULL) {
1801
- fprintf(stderr,"\nError: Cannot open output file \"%s\"\n\n", filename);
1802
- exit(1);
1803
- }
1804
- free( filename );
1805
-
1806
- t = explode(t);
1807
- add_alphabet(t);
1808
- t = minimise(t);
1809
- t->store(file);
1810
- fclose(file);
1811
- }
1812
-
1813
-
1814
- /*******************************************************************/
1815
- /* */
1816
- /* result */
1817
- /* */
1818
- /*******************************************************************/
1819
-
1820
- Transducer *result( Transducer *t, bool switch_flag )
1821
-
1822
- {
1823
- t = explode(t);
1824
-
1825
- // delete the variable values
1826
- vector<char*> s;
1827
- for( VarMap::iterator it=VM.begin(); it != VM.end(); it++ ) {
1828
- s.push_back(it->first);
1829
- delete it->second;
1830
- it->second = NULL;
1831
- }
1832
- VM.clear();
1833
- for( size_t i=0; i<s.size(); i++ )
1834
- free(s[i]);
1835
- s.clear();
1836
-
1837
- if (switch_flag)
1838
- t = switch_levels(t);
1839
- add_alphabet(t);
1840
- t = minimise(t);
1841
- return t;
1842
- }