ruby-sfst 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -0
  3. data/COPYING +280 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +54 -0
  6. data/README.md +1 -1
  7. data/Rakefile +9 -18
  8. data/bin/console +7 -0
  9. data/bin/setup +6 -0
  10. data/ext/sfst/alphabet.cc +879 -0
  11. data/ext/sfst/alphabet.h +302 -0
  12. data/ext/sfst/basic.cc +85 -0
  13. data/ext/{sfst_machine → sfst}/basic.h +7 -4
  14. data/ext/sfst/compact.cc +629 -0
  15. data/ext/sfst/compact.h +100 -0
  16. data/ext/sfst/determinise.cc +279 -0
  17. data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
  18. data/ext/sfst/fst.cc +1150 -0
  19. data/ext/sfst/fst.h +374 -0
  20. data/ext/sfst/hopcroft.cc +681 -0
  21. data/ext/sfst/interface.cc +1921 -0
  22. data/ext/sfst/interface.h +171 -0
  23. data/ext/sfst/make-compact.cc +323 -0
  24. data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
  25. data/ext/sfst/mem.h +80 -0
  26. data/ext/sfst/operators.cc +1273 -0
  27. data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
  28. data/ext/sfst/sgi.h +72 -0
  29. data/ext/sfst/utf8.cc +149 -0
  30. data/ext/{sfst_machine → sfst}/utf8.h +7 -4
  31. data/lib/sfst.rb +2 -1
  32. data/lib/sfst/version.rb +1 -1
  33. data/ruby-sfst.gemspec +23 -23
  34. metadata +107 -35
  35. data/ext/sfst_machine/alphabet.cc +0 -812
  36. data/ext/sfst_machine/alphabet.h +0 -273
  37. data/ext/sfst_machine/basic.cc +0 -84
  38. data/ext/sfst_machine/compact.cc +0 -616
  39. data/ext/sfst_machine/compact.h +0 -98
  40. data/ext/sfst_machine/determinise.cc +0 -303
  41. data/ext/sfst_machine/fst.cc +0 -1000
  42. data/ext/sfst_machine/fst.h +0 -369
  43. data/ext/sfst_machine/interface.cc +0 -1842
  44. data/ext/sfst_machine/interface.h +0 -93
  45. data/ext/sfst_machine/make-compact.cc +0 -327
  46. data/ext/sfst_machine/mem.h +0 -74
  47. data/ext/sfst_machine/operators.cc +0 -1131
  48. data/ext/sfst_machine/sgi.h +0 -44
  49. data/ext/sfst_machine/utf8.cc +0 -146
  50. data/test/test_sfst.fst +0 -3
  51. data/test/test_sfst.rb +0 -114
@@ -1,369 +0,0 @@
1
- /*******************************************************************/
2
- /* */
3
- /* FILE fst.h */
4
- /* MODULE fst */
5
- /* PROGRAM SFST */
6
- /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
- /* */
8
- /* PURPOSE finite state tools */
9
- /* */
10
- /*******************************************************************/
11
-
12
- #ifndef _FST_H_
13
- #define _FST_H_
14
-
15
- #include "alphabet.h"
16
-
17
-
18
- /*******************************************************************/
19
- /* include commands */
20
- /*******************************************************************/
21
-
22
- #include <string>
23
-
24
- #include <vector>
25
-
26
- #include "mem.h"
27
-
28
- typedef unsigned short VType;
29
-
30
- extern int Quiet;
31
-
32
- class Node;
33
- class Arc;
34
- class Arcs;
35
- class Transducer;
36
-
37
-
38
- struct hashf {
39
- size_t operator()(const Node *n) const { return (size_t) n; }
40
- };
41
- struct equalf {
42
- int operator()(const Node *n1, const Node *n2) const { return n1==n2; }
43
- };
44
- typedef hash_set<Node*, hashf, equalf> NodeHashSet;
45
-
46
-
47
- /***************** class Arc *************************************/
48
-
49
- class Arc {
50
-
51
- private:
52
- Label l;
53
- Node *target;
54
- Arc *next;
55
-
56
- public:
57
- void init( Label ll, Node *node ) { l=ll; target=node; };
58
- Label label( void ) const { return l; };
59
- Node *target_node( void ) { return target; };
60
- const Node *target_node( void ) const { return target; };
61
-
62
- friend class Arcs;
63
- friend class ArcsIter;
64
- };
65
-
66
-
67
- /***************** class Arcs ************************************/
68
-
69
- class Arcs {
70
-
71
- private:
72
- Arc *first_arcp;
73
- Arc *first_epsilon_arcp;
74
-
75
- public:
76
- void init( void ) { first_arcp = first_epsilon_arcp = NULL; };
77
- Arcs( void ) { init(); };
78
- Node *target_node( Label l );
79
- const Node *target_node( Label l ) const;
80
- void add_arc( Label, Node*, Transducer* );
81
- int remove_arc( Arc* );
82
- bool is_empty( void ) const { return !(first_arcp || first_epsilon_arcp); };
83
- bool epsilon_transition_exists( void ) const { return first_epsilon_arcp != NULL; };
84
- bool non_epsilon_transition_exists( void ) const { return first_arcp != NULL; };
85
- int size( void ) const;
86
-
87
- friend class ArcsIter;
88
- };
89
-
90
-
91
- /***************** class ArcsIter ********************************/
92
-
93
- class ArcsIter {
94
-
95
- // ArcsIter iterates over the arcs starting with epsilon arcs
96
-
97
- private:
98
- Arc *current_arcp;
99
- Arc *more_arcs;
100
-
101
- public:
102
- typedef enum {all,non_eps,eps} IterType;
103
-
104
- ArcsIter( const Arcs *arcs, IterType type=all ) {
105
- more_arcs = NULL;
106
- if (type == all) {
107
- if (arcs->first_epsilon_arcp) {
108
- current_arcp = arcs->first_epsilon_arcp;
109
- more_arcs = arcs->first_arcp;
110
- }
111
- else
112
- current_arcp = arcs->first_arcp;
113
- }
114
- else if (type == non_eps)
115
- current_arcp = arcs->first_arcp;
116
- else
117
- current_arcp = arcs->first_epsilon_arcp;
118
- };
119
-
120
- void operator++( int ) {
121
- if (current_arcp) {
122
- current_arcp = current_arcp->next;
123
- if (!current_arcp && more_arcs) {
124
- current_arcp = more_arcs;
125
- more_arcs = NULL;
126
- }
127
- }
128
- };
129
- operator Arc*( void ) { return current_arcp; };
130
-
131
- };
132
-
133
-
134
- /***************** class Node ************************************/
135
-
136
- class Node {
137
-
138
- private:
139
- bool final;
140
- VType visited;
141
- Arcs arcsp;
142
- Node *forwardp;
143
-
144
- public:
145
- Node( void ) { init(); };
146
- void init( void );
147
- bool is_final( void ) const { return final; };
148
- void set_final( bool flag ) { final = flag; };
149
- void set_forward( Node *node ) { forwardp = node; };
150
- const Node *target_node( Label l ) const { return arcs()->target_node(l); };
151
- Node *target_node( Label l ) { return arcs()->target_node(l); };
152
- void add_arc( Label l, Node *n, Transducer *a ) { arcs()->add_arc(l, n, a); };
153
- Arcs *arcs( void ) { return &arcsp; };
154
- const Arcs *arcs( void ) const { return &arcsp; };
155
- Node *forward( void ) { return forwardp; };
156
- void clear_visited( NodeHashSet &nodeset );
157
- bool was_visited( VType vmark ) {
158
- if (visited == vmark)
159
- return true;
160
- visited = vmark;
161
- return false;
162
- };
163
- bool check_visited( VType vm ) // leaves the visited flag unchanged
164
- { return (visited==vm); };
165
- };
166
-
167
-
168
- /***************** class Node2Int *********************************/
169
-
170
- class Node2Int {
171
-
172
- struct hashf {
173
- size_t operator()(const Node *node) const {
174
- return (size_t)node;
175
- }
176
- };
177
- struct equalf {
178
- int operator()(const Node *n1, const Node *n2) const {
179
- return (n1 == n2);
180
- }
181
- };
182
- typedef hash_map<Node*, int, hashf, equalf> NL;
183
-
184
- private:
185
- int current_number;
186
- NL number;
187
-
188
- public:
189
- int &operator[]( Node *node ) {
190
- NL::iterator it=number.find(node);
191
- if (it == number.end())
192
- return number.insert(NL::value_type(node, 0)).first->second;
193
- return it->second;
194
- };
195
- };
196
-
197
-
198
- /***************** class NodeNumbering ****************************/
199
-
200
- class NodeNumbering {
201
-
202
- private:
203
- std::vector<Node*> nodes;
204
- Node2Int nummap;
205
- void number_node( Node*, Transducer& );
206
-
207
- public:
208
- NodeNumbering( Transducer& );
209
- int operator[]( Node *node ) { return nummap[node]; };
210
- size_t number_of_nodes( void ) { return nodes.size(); };
211
- Node *get_node( size_t n ) { return nodes[n]; };
212
- };
213
-
214
-
215
- /***************** class PairMapping ****************************/
216
-
217
- class PairMapping {
218
- // This class is used to map a node pair from two transducers
219
- // to a single node in another transducer
220
-
221
- typedef std::pair<Node*, Node*> NodePair;
222
-
223
- private:
224
- struct hashf {
225
- size_t operator()(const NodePair p) const {
226
- return (size_t)p.first ^ (size_t)p.second;
227
- }
228
- };
229
- struct equalf {
230
- int operator()(const NodePair p1, const NodePair p2) const {
231
- return (p1.first==p2.first && p1.second == p2.second);
232
- }
233
- };
234
- typedef hash_map<NodePair, Node*, hashf, equalf> PairMap;
235
- PairMap pm;
236
-
237
- public:
238
- typedef PairMap::iterator iterator;
239
- iterator begin( void ) { return pm.begin(); };
240
- iterator end( void ) { return pm.end(); };
241
- iterator find( Node *n1, Node *n2 )
242
- { return pm.find( NodePair(n1,n2) ); };
243
- Node* &operator[]( NodePair p ) { return pm.operator[](p); };
244
-
245
- };
246
-
247
-
248
- /***************** class Transducer *******************************/
249
-
250
- class Transducer {
251
-
252
- private:
253
- bool deterministic;
254
- bool minimised;
255
- Node root;
256
- Mem mem;
257
-
258
- typedef std::set<Label, Label::label_cmp> LabelSet;
259
- typedef hash_map<Character, char*> SymbolMap;
260
-
261
- void incr_vmark( void ) {
262
- if (++vmark == 0) {
263
- NodeHashSet nodes;
264
- root.clear_visited( nodes );
265
- fprintf(stderr,"clearing flags\n");
266
- vmark = 1;
267
- }
268
- };
269
- void reverse_node( Node *old_node, Transducer *new_node );
270
- Label recode_label( Label, bool lswitch, bool recode, Alphabet& );
271
- Node *copy_nodes( Node *n, Transducer *a,
272
- bool lswitch=false, bool recode=false );
273
- void rec_cat_nodes( Node*, Node* );
274
- bool productive_node( Node* );
275
- bool prune_nodes( Node* );
276
- void negate_nodes( Node*, Node* );
277
- bool compare_nodes( Node *node, Node *node2, Transducer &a2 );
278
- void map_nodes( Node *node, Node *node2, Transducer *a, Level level );
279
- void freely_insert_at_node( Node *node, Label l );
280
- int print_strings_node(Node *node, char *buffer, int pos, FILE *file, bool);
281
- bool infinitely_ambiguous_node( Node* );
282
- bool is_cyclic_node( Node*, NodeHashSet &visited );
283
- bool is_automaton_node( Node* );
284
- bool generate1( Node*, Node2Int&, char*, int, char*, int, FILE* );
285
- void store_symbols( Node*, SymbolMap&, LabelSet& );
286
-
287
- void splice_nodes(Node*, Node*, Label sl, Transducer*, Transducer*);
288
- void splice_arc( Node*, Node*, Node*, Transducer* );
289
- void enumerate_paths_node( Node*, std::vector<Label>&, NodeHashSet&,
290
- std::vector<Transducer*>& );
291
- void replace_char2( Node*, Node*, Character, Character, Transducer* );
292
- Node *create_node( std::vector<Node*>&, char*, size_t line );
293
- void read_transducer_binary( FILE* );
294
- void read_transducer_text( FILE* );
295
-
296
- public:
297
- VType vmark;
298
- Alphabet alphabet; // The set of all labels, i.e. character pairs
299
-
300
- Transducer( void ) : root(), mem()
301
- { vmark = 0; deterministic = minimised = false; };
302
- // convertion of a string to an transducer
303
- Transducer( char *s, const Alphabet *a=NULL, bool extended=false );
304
- // reads a word list from a file and stores it in the transducer
305
- Transducer( std::istream&, const Alphabet *a=NULL, bool verbose=false );
306
- // reads a transducer from a binary or text file
307
- Transducer( FILE*, bool binary=true );
308
- // turns a sequence of labels into a transducer
309
- Transducer( std::vector<Label>& );
310
-
311
- Node *root_node( void ) { return &root; }; // returns the root node
312
- const Node *root_node( void ) const { return &root; }; // returns the root node
313
- Node *new_node( void ); // memory alocation for a new node
314
- Arc *new_arc( Label l, Node *target ); // memory alocation for a new arc
315
- void add_string( char *s, bool extended=false, Alphabet *a=NULL );
316
- void complete_alphabet( void );
317
- void minimise_alphabet( void );
318
- void prune( void ); // remove unnecessary arcs
319
-
320
- int print_strings( FILE*, bool with_brackets=true ); //enumerate all strings
321
-
322
- bool analyze_string( char *s, FILE *file, bool with_brackets=true );
323
- bool generate_string( char *s, FILE *file, bool with_brackets=true );
324
- bool generate( FILE *file, bool separate=false );
325
-
326
- void clear( void ); // clears the transducer. The resulting transducer
327
- // is like one created with Transducer()
328
- // copy duplicates an transducer
329
- // if called with a non-zero argument, upper and lower level are switched
330
- Transducer &copy( bool lswitch=false, const Alphabet *al=NULL );
331
- Transducer &switch_levels( void ) { return copy( true ); };
332
- Transducer &splice( Label l, Transducer *a);
333
- Transducer &freely_insert( Label l );
334
- Transducer &replace_char( Character c, Character nc );
335
- Transducer &level( Level );
336
- Transducer &lower_level( void ) // creates an transducer for the "lower" language
337
- { return level(lower); };
338
- Transducer &upper_level( void ) // creates an transducer for the "upper" language
339
- { return level(upper); };
340
- Transducer &determinise( void ); // creates a deterministic transducer
341
- Transducer &minimise( bool verbose=true ); // creates a minimised transducer
342
- void store( FILE* ); // stores the transducer in binary format
343
- void store_lowmem( FILE* );
344
- void read( FILE* ); // reads an transducer in binary format
345
- bool enumerate_paths( std::vector<Transducer*>& );
346
-
347
- Transducer &reverse( void ); // reverse language
348
- Transducer &operator|( Transducer& ); // union, disjunction
349
- Transducer &operator+( Transducer& ); // concatenation
350
- Transducer &operator/( Transducer& ); // subtraction
351
- Transducer &operator&( Transducer& ); // intersection, conjunction
352
- Transducer &operator||( Transducer& ); // composition
353
- Transducer &operator!( void ); // complement, negation
354
- Transducer &kleene_star( void );
355
- bool operator==( Transducer& ); // minimises its arguments first
356
-
357
- bool is_cyclic( void );
358
- bool is_automaton( void );
359
- bool is_infinitely_ambiguous( void );
360
- bool is_empty( void ); // For efficiency reasons, these functions
361
- bool generates_empty_string( void );// are better called after minimisation
362
-
363
- friend class NodeNumbering;
364
- friend class EdgeCount;
365
- friend class MakeCompactTransducer;
366
- friend std::ostream &operator<<(std::ostream&, Transducer&);
367
- };
368
-
369
- #endif
@@ -1,1842 +0,0 @@
1
- /*******************************************************************/
2
- /* */
3
- /* FILE interface.C */
4
- /* MODULE interface */
5
- /* PROGRAM SFST */
6
- /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
- /* */
8
- /*******************************************************************/
9
-
10
- #include "interface.h"
11
-
12
- #include <fstream>
13
- using std::ifstream;
14
- using std::ofstream;
15
-
16
- #include <set>
17
- using std::set;
18
-
19
- #include "sgi.h"
20
-
21
- using std::cerr;
22
- using std::cout;
23
- using std::vector;
24
-
25
- struct ltstr {
26
- bool operator()(const char* s1, const char* s2) const
27
- { return strcmp(s1, s2) < 0; }
28
- };
29
-
30
- struct eqstr {
31
- bool operator()(const char* s1, const char* s2) const
32
- { return strcmp(s1, s2) == 0; }
33
- };
34
-
35
- typedef set<char*, ltstr> RVarSet;
36
-
37
- typedef hash_map<char*, Transducer*, hash<const char*>, eqstr> VarMap;
38
-
39
- typedef hash_map<char*, Range*, hash<const char*>, eqstr> SVarMap;
40
-
41
- bool Verbose=false;
42
-
43
- Alphabet TheAlphabet;
44
-
45
- static VarMap VM;
46
- static SVarMap SVM;
47
- static RVarSet RS;
48
- static RVarSet RSS;
49
- static int Alphabet_Defined=0;
50
-
51
-
52
- /*******************************************************************/
53
- /* */
54
- /* error */
55
- /* */
56
- /*******************************************************************/
57
-
58
- static void error( const char *message )
59
-
60
- {
61
- cerr << "\nError: " << message << "\naborted.\n";
62
- exit(1);
63
- }
64
-
65
-
66
- /*******************************************************************/
67
- /* */
68
- /* error2 */
69
- /* */
70
- /*******************************************************************/
71
-
72
- void error2( char *message, char *input )
73
-
74
- {
75
- cerr << "\nError: " << message << ": " << input << "\naborted.\n";
76
- exit(1);
77
- }
78
-
79
-
80
- /*******************************************************************/
81
- /* */
82
- /* symbol_code */
83
- /* */
84
- /*******************************************************************/
85
-
86
- Character symbol_code( char *symbol )
87
-
88
- {
89
- int c=TheAlphabet.symbol2code(symbol);
90
- if (c == EOF)
91
- c = TheAlphabet.add_symbol( symbol );
92
- free(symbol);
93
- return (Character)c;
94
- }
95
-
96
-
97
- /*******************************************************************/
98
- /* */
99
- /* character_code */
100
- /* */
101
- /*******************************************************************/
102
-
103
- Character character_code( unsigned int uc )
104
-
105
- {
106
- if (TheAlphabet.utf8)
107
- return symbol_code(fst_strdup(int2utf8(uc)));
108
-
109
- unsigned char *buffer=(unsigned char*)malloc(2);
110
- buffer[0] = (unsigned char)uc;
111
- buffer[1] = 0;
112
- return symbol_code((char*)buffer);
113
- }
114
-
115
-
116
- /*******************************************************************/
117
- /* */
118
- /* add_value */
119
- /* */
120
- /*******************************************************************/
121
-
122
- Range *add_value( Character c, Range *r )
123
-
124
- {
125
- Range *result=new Range;
126
- result->character = c;
127
- result->next = r;
128
- return result;
129
- }
130
-
131
-
132
- /*******************************************************************/
133
- /* */
134
- /* add_values */
135
- /* */
136
- /*******************************************************************/
137
-
138
- Range *add_values( unsigned int c1, unsigned int c2, Range *r )
139
-
140
- {
141
- for( unsigned int c=c2; c>=c1; c-- )
142
- r = add_value(character_code(c), r);
143
- return r;
144
- }
145
-
146
-
147
- /*******************************************************************/
148
- /* */
149
- /* append_values */
150
- /* */
151
- /*******************************************************************/
152
-
153
- Range *append_values( Range *r2, Range *r )
154
-
155
- {
156
- if (r2 == NULL)
157
- return r;
158
- return add_value(r2->character, append_values(r2->next, r));
159
- }
160
-
161
-
162
- /*******************************************************************/
163
- /* */
164
- /* add_var_values */
165
- /* */
166
- /*******************************************************************/
167
-
168
- Range *add_var_values( char *name, Range *r )
169
-
170
- {
171
- return append_values(svar_value(name), r);
172
- }
173
-
174
-
175
- /*******************************************************************/
176
- /* */
177
- /* in_range */
178
- /* */
179
- /*******************************************************************/
180
-
181
- static bool in_range( unsigned int c, Range *r )
182
-
183
- {
184
- while (r) {
185
- if (r->character == c)
186
- return true;
187
- r = r->next;
188
- }
189
- return false;
190
- }
191
-
192
-
193
- /*******************************************************************/
194
- /* */
195
- /* free_values */
196
- /* */
197
- /*******************************************************************/
198
-
199
- static void free_values( Range *r )
200
-
201
- {
202
- if (r) {
203
- free_values(r->next);
204
- delete r;
205
- }
206
- }
207
-
208
-
209
- /*******************************************************************/
210
- /* */
211
- /* free_values */
212
- /* */
213
- /*******************************************************************/
214
-
215
- static void free_values( Ranges *r )
216
-
217
- {
218
- if (r) {
219
- free_values(r->next);
220
- delete r;
221
- }
222
- }
223
-
224
-
225
- /*******************************************************************/
226
- /* */
227
- /* free_contexts */
228
- /* */
229
- /*******************************************************************/
230
-
231
- static void free_contexts( Contexts *c )
232
-
233
- {
234
- if (c) {
235
- free_contexts(c->next);
236
- delete c;
237
- }
238
- }
239
-
240
-
241
- /*******************************************************************/
242
- /* */
243
- /* copy_values */
244
- /* */
245
- /*******************************************************************/
246
-
247
- static Range *copy_values( const Range *r )
248
-
249
- {
250
- if (r == NULL)
251
- return NULL;
252
- return add_value( r->character, copy_values(r->next));
253
- }
254
-
255
-
256
- /*******************************************************************/
257
- /* */
258
- /* complement_range */
259
- /* */
260
- /*******************************************************************/
261
-
262
- Range *complement_range( Range *r )
263
-
264
- {
265
- vector<Character> sym;
266
- for( Range *p=r; p; p=p->next)
267
- sym.push_back( p->character );
268
- free_values( r );
269
-
270
- TheAlphabet.complement(sym);
271
- if (sym.size() == 0)
272
- error("Empty character range!");
273
-
274
-
275
- Range *result=NULL;
276
- for( size_t i=0; i<sym.size(); i++ ) {
277
- Range *tmp = new Range;
278
- tmp->character = sym[i];
279
- tmp->next = result;
280
- result = tmp;
281
- }
282
-
283
- return result;
284
- }
285
-
286
-
287
- /*******************************************************************/
288
- /* */
289
- /* make_transducer */
290
- /* */
291
- /*******************************************************************/
292
-
293
- static Transducer *make_transducer( Range *r1, Range *r2 )
294
-
295
- {
296
- Transducer *t=new Transducer();
297
- Node *node=t->new_node();
298
- node->set_final(1);
299
-
300
- if (r1 == NULL || r2 == NULL) {
301
- if (!Alphabet_Defined)
302
- error("The wildcard symbol '.' requires the definition of an alphabet");
303
-
304
- // one of the ranges was '.'
305
- for(Alphabet::const_iterator it=TheAlphabet.begin();
306
- it!=TheAlphabet.end(); it++)
307
- if ((r1 == NULL || in_range(it->lower_char(), r1)) &&
308
- (r2 == NULL || in_range(it->upper_char(), r2)))
309
- t->root_node()->add_arc( *it, node, t );
310
- }
311
- else {
312
- for (;;) {
313
- Label l(r1->character, r2->character);
314
- // TheAlphabet.insert(l);
315
- t->root_node()->add_arc( l, node, t );
316
- if (!r1->next && !r2->next)
317
- break;
318
- if (r1->next)
319
- r1 = r1->next;
320
- if (r2->next)
321
- r2 = r2->next;
322
- }
323
- }
324
-
325
- return t;
326
- }
327
-
328
-
329
- /*******************************************************************/
330
- /* */
331
- /* empty_transducer */
332
- /* */
333
- /*******************************************************************/
334
-
335
- static Transducer *empty_transducer()
336
-
337
- {
338
- Transducer *t=new Transducer();
339
- t->root_node()->set_final(1);
340
-
341
- return t;
342
- }
343
-
344
-
345
- /*******************************************************************/
346
- /* */
347
- /* one_label_transducer */
348
- /* */
349
- /*******************************************************************/
350
-
351
- static Transducer *one_label_transducer( Label l )
352
-
353
- {
354
- Transducer *t = new Transducer();
355
- Node *last = t->new_node();
356
- t->root_node()->add_arc( l, last, t );
357
- last->set_final(1);
358
-
359
- return t;
360
- }
361
-
362
-
363
- /*******************************************************************/
364
- /* */
365
- /* new_transducer */
366
- /* */
367
- /*******************************************************************/
368
-
369
- Transducer *new_transducer( Range *r1, Range *r2 )
370
-
371
- {
372
- Transducer *t=make_transducer( r1, r2);
373
- if (r1 != r2)
374
- free_values(r1);
375
- free_values(r2);
376
- return t;
377
- }
378
-
379
-
380
- /*******************************************************************/
381
- /* */
382
- /* read_words */
383
- /* */
384
- /*******************************************************************/
385
-
386
- Transducer *read_words( char *filename )
387
-
388
- {
389
- if (Verbose)
390
- fprintf(stderr,"\nreading words from %s...", filename);
391
- ifstream is(filename);
392
- if (!is.is_open()) {
393
- static char message[1000];
394
- sprintf(message,"Error: Cannot open file \"%s\"!", filename);
395
- throw message;
396
- }
397
- free( filename );
398
- Transducer *t = new Transducer(is, &TheAlphabet, Verbose);
399
- is.close();
400
- TheAlphabet.insert_symbols(t->alphabet);
401
- if (Verbose)
402
- fprintf(stderr,"finished\n");
403
- return t;
404
- }
405
-
406
-
407
- /*******************************************************************/
408
- /* */
409
- /* read_transducer */
410
- /* */
411
- /*******************************************************************/
412
-
413
- Transducer *read_transducer( char *filename )
414
-
415
- {
416
- if (Verbose)
417
- fprintf(stderr,"\nreading transducer from %s...", filename);
418
- FILE *file = fopen(filename,"rb");
419
- if (file == NULL) {
420
- static char message[1000];
421
- sprintf(message,"Error: Cannot open file \"%s\"!",filename);
422
- throw message;
423
- }
424
- Transducer t(file);
425
- fclose(file);
426
- if (t.alphabet.utf8 != TheAlphabet.utf8) {
427
- static char message[1000];
428
- sprintf(message,"Error: incompatible character encoding in file \"%s\"!",
429
- filename);
430
- throw message;
431
- }
432
- free( filename );
433
- Transducer *nt = &t.copy(false, &TheAlphabet);
434
- TheAlphabet.insert_symbols(nt->alphabet);
435
- if (Verbose)
436
- fprintf(stderr,"finished\n");
437
- return nt;
438
- }
439
-
440
-
441
- /*******************************************************************/
442
- /* */
443
- /* def_alphabet */
444
- /* */
445
- /*******************************************************************/
446
-
447
- void def_alphabet( Transducer *t )
448
-
449
- {
450
- t = explode(t);
451
- t = minimise(t);
452
- t->alphabet.clear_char_pairs();
453
- t->complete_alphabet();
454
- TheAlphabet.clear_char_pairs();
455
- TheAlphabet.copy(t->alphabet);
456
- Alphabet_Defined = 1;
457
- delete t;
458
- }
459
-
460
-
461
- /*******************************************************************/
462
- /* */
463
- /* def_svar definition of a value range variable */
464
- /* */
465
- /*******************************************************************/
466
-
467
- bool def_svar( char *name, Range *r )
468
-
469
- {
470
- // delete the old value of the variable
471
- SVarMap::iterator it=SVM.find(name);
472
- if (it != SVM.end()) {
473
- char *n=it->first;
474
- Range *v=it->second;
475
- SVM.erase(it);
476
- delete v;
477
- free(n);
478
- }
479
- SVM[name] = r;
480
- return r == NULL;
481
- }
482
-
483
-
484
- /*******************************************************************/
485
- /* */
486
- /* svar_value */
487
- /* */
488
- /*******************************************************************/
489
-
490
- Range *svar_value( char *name )
491
-
492
- {
493
- SVarMap::iterator it=SVM.find(name);
494
- if (it == SVM.end())
495
- error2("undefined variable", name);
496
- free(name);
497
- return copy_values(it->second);
498
- }
499
-
500
-
501
- /*******************************************************************/
502
- /* */
503
- /* rsvar_value */
504
- /* */
505
- /*******************************************************************/
506
-
507
- Range *rsvar_value( char *name )
508
-
509
- {
510
- if (RSS.find(name) == RSS.end())
511
- RSS.insert(fst_strdup(name));
512
- return add_value(symbol_code(name), NULL);
513
- }
514
-
515
-
516
- /*******************************************************************/
517
- /* */
518
- /* def_var definition of an transducer variable */
519
- /* */
520
- /*******************************************************************/
521
-
522
- bool def_var( char *name, Transducer *t )
523
-
524
- {
525
- // delete the old value of the variable
526
- VarMap::iterator it=VM.find(name);
527
- if (it != VM.end()) {
528
- char *n=it->first;
529
- Transducer *v=it->second;
530
- VM.erase(it);
531
- delete v;
532
- free(n);
533
- }
534
-
535
- t = explode(t);
536
- t = minimise(t);
537
-
538
- VM[name] = t;
539
- return t->is_empty();
540
- }
541
-
542
-
543
- /*******************************************************************/
544
- /* */
545
- /* def_rvar definition of an agreement variable for automata */
546
- /* */
547
- /*******************************************************************/
548
-
549
- bool def_rvar( char *name, Transducer *t )
550
-
551
- {
552
- if (t->is_cyclic())
553
- error2("cyclic transducer assigned to", name);
554
- return def_var( name, t );
555
- }
556
-
557
-
558
- /*******************************************************************/
559
- /* */
560
- /* var_value */
561
- /* */
562
- /*******************************************************************/
563
-
564
- Transducer *var_value( char *name )
565
-
566
- {
567
- VarMap::iterator it=VM.find(name);
568
- if (it == VM.end())
569
- error2("undefined variable", name);
570
- free(name);
571
- return &(it->second->copy());
572
- }
573
-
574
-
575
- /*******************************************************************/
576
- /* */
577
- /* rvar_value */
578
- /* */
579
- /*******************************************************************/
580
-
581
- Transducer *rvar_value( char *name )
582
-
583
- {
584
- if (RS.find(name) == RS.end())
585
- RS.insert(fst_strdup(name));
586
- Range *r=add_value(symbol_code(name), NULL);
587
- return new_transducer(r,r);
588
- }
589
-
590
-
591
- /*******************************************************************/
592
- /* */
593
- /* explode */
594
- /* */
595
- /*******************************************************************/
596
-
597
- Transducer *explode( Transducer *t )
598
-
599
- {
600
- if (RS.size() == 0 && RSS.size() == 0)
601
- return t;
602
-
603
- t = minimise(t);
604
-
605
- vector<char*> name;
606
- for( RVarSet::iterator it=RS.begin(); it!=RS.end(); it++)
607
- name.push_back(*it);
608
- RS.clear();
609
-
610
- // replace all agreement variables
611
- for( size_t i=0; i<name.size(); i++ ) {
612
- Transducer *nt = NULL;
613
- Label l(TheAlphabet.symbol2code(name[i]));
614
- Transducer *vt=var_value(name[i]);
615
-
616
- // enumerate all paths of the transducer
617
- vector<Transducer*> it;
618
- vt->enumerate_paths(it);
619
- delete vt;
620
-
621
- // insert each path
622
- for( size_t i=0; i<it.size(); i++ ) {
623
-
624
- // insertion
625
- Transducer *t1 = &t->splice(l, it[i]);
626
- delete it[i];
627
-
628
- if (nt == NULL)
629
- nt = t1;
630
- else
631
- nt = disjunction(nt, t1);
632
- }
633
- delete t;
634
- t = nt;
635
- }
636
-
637
- name.clear();
638
- for( RVarSet::iterator it=RSS.begin(); it!=RSS.end(); it++)
639
- name.push_back(*it);
640
- RSS.clear();
641
-
642
- // replace all agreement variables
643
- for( size_t i=0; i<name.size(); i++ ) {
644
- Transducer *nt = NULL;
645
- Character c=TheAlphabet.symbol2code(name[i]);
646
- Range *r=svar_value(name[i]);
647
-
648
- // insert each character
649
- while (r != NULL) {
650
-
651
- // insertion
652
- Transducer *t1 = &t->replace_char(c, r->character);
653
-
654
- if (nt == NULL)
655
- nt = t1;
656
- else
657
- nt = disjunction(nt, t1);
658
-
659
- Range *next = r->next;
660
- delete r;
661
- r = next;
662
- }
663
- delete t;
664
- t = nt;
665
- }
666
-
667
- return t;
668
- }
669
-
670
-
671
- /*******************************************************************/
672
- /* */
673
- /* catenate */
674
- /* */
675
- /*******************************************************************/
676
-
677
- Transducer *catenate( Transducer *t1, Transducer *t2 )
678
-
679
- {
680
- Transducer *t = &(*t1 + *t2);
681
- delete t1;
682
- delete t2;
683
- return t;
684
- }
685
-
686
-
687
- /*******************************************************************/
688
- /* */
689
- /* add_range */
690
- /* */
691
- /*******************************************************************/
692
-
693
- Ranges *add_range( Range *r, Ranges *l )
694
-
695
- {
696
- Ranges *result = new Ranges;
697
- result->range = r;
698
- result->next = l;
699
- return result;
700
- }
701
-
702
-
703
- /*******************************************************************/
704
- /* */
705
- /* make_mapping */
706
- /* */
707
- /*******************************************************************/
708
-
709
- Transducer *make_mapping( Ranges *list1, Ranges *list2 )
710
-
711
- {
712
- Ranges *l1=list1;
713
- Ranges *l2=list2;
714
- Transducer *t=new Transducer();
715
-
716
- Node *node=t->root_node();
717
- while (l1 && l2) {
718
- Node *nn=t->new_node();
719
- for( Range *r1=l1->range; r1; r1=r1->next )
720
- for( Range *r2=l2->range; r2; r2=r2->next )
721
- node->add_arc( Label(r1->character, r2->character), nn, t );
722
- node = nn;
723
- l1 = l1->next;
724
- l2 = l2->next;
725
- }
726
- while (l1) {
727
- Node *nn=t->new_node();
728
- for( Range *r1=l1->range; r1; r1=r1->next )
729
- node->add_arc( Label(r1->character, Label::epsilon), nn, t );
730
- node = nn;
731
- l1 = l1->next;
732
- }
733
- while (l2) {
734
- Node *nn=t->new_node();
735
- for( Range *r2=l2->range; r2; r2=r2->next )
736
- node->add_arc( Label(Label::epsilon, r2->character), nn, t );
737
- node = nn;
738
- l2 = l2->next;
739
- }
740
- node->set_final(1);
741
-
742
- free_values(list1);
743
- free_values(list2);
744
- return t;
745
- }
746
-
747
-
748
- /*******************************************************************/
749
- /* */
750
- /* disjunction */
751
- /* */
752
- /*******************************************************************/
753
-
754
- Transducer *disjunction( Transducer *t1, Transducer *t2 )
755
-
756
- {
757
- Transducer *t = &(*t1 | *t2);
758
- delete t1;
759
- delete t2;
760
- return t;
761
- }
762
-
763
-
764
- /*******************************************************************/
765
- /* */
766
- /* conjunction */
767
- /* */
768
- /*******************************************************************/
769
-
770
- Transducer *conjunction( Transducer *t1, Transducer *t2 )
771
-
772
- {
773
- if (RS.size() > 0 || RSS.size() > 0)
774
- cerr << "\nWarning: agreement operation inside of conjunction!\n";
775
- Transducer *t = &(*t1 & *t2);
776
- delete t1;
777
- delete t2;
778
- return t;
779
- }
780
-
781
-
782
- /*******************************************************************/
783
- /* */
784
- /* subtraction */
785
- /* */
786
- /*******************************************************************/
787
-
788
- Transducer *subtraction( Transducer *t1, Transducer *t2 )
789
-
790
- {
791
- if (RS.size() > 0 || RSS.size() > 0)
792
- cerr << "\nWarning: agreement operation inside of conjunction!\n";
793
- Transducer *t = &(*t1 / *t2);
794
- delete t1;
795
- delete t2;
796
- return t;
797
- }
798
-
799
-
800
- /*******************************************************************/
801
- /* */
802
- /* composition */
803
- /* */
804
- /*******************************************************************/
805
-
806
- Transducer *composition( Transducer *t1, Transducer *t2 )
807
-
808
- {
809
- if (RS.size() > 0 || RSS.size() > 0)
810
- cerr << "\nWarning: agreement operation inside of composition!\n";
811
- Transducer *t = &(*t1 || *t2);
812
- delete t1;
813
- delete t2;
814
- return t;
815
- }
816
-
817
- /*******************************************************************/
818
- /* */
819
- /* freely_insert */
820
- /* */
821
- /*******************************************************************/
822
-
823
- Transducer *freely_insert( Transducer *t, Character lc, Character uc )
824
-
825
- {
826
- return &t->freely_insert(Label(lc,uc));
827
- }
828
-
829
-
830
- /*******************************************************************/
831
- /* */
832
- /* negation */
833
- /* */
834
- /*******************************************************************/
835
-
836
- Transducer *negation( Transducer *t )
837
-
838
- {
839
- if (RS.size() > 0 || RSS.size() > 0)
840
- cerr << "\nWarning: agreement operation inside of negation!\n";
841
- if (!Alphabet_Defined)
842
- error("Negation requires the definition of an alphabet");
843
- t->alphabet.clear_char_pairs();
844
- t->alphabet.copy(TheAlphabet);
845
- Transducer *nt = &(!*t);
846
- delete t;
847
- return nt;
848
- }
849
-
850
-
851
- /*******************************************************************/
852
- /* */
853
- /* upper_level */
854
- /* */
855
- /*******************************************************************/
856
-
857
- Transducer *upper_level( Transducer *t )
858
-
859
- {
860
- Transducer *nt = &t->upper_level();
861
- delete t;
862
- return nt;
863
- }
864
-
865
-
866
- /*******************************************************************/
867
- /* */
868
- /* lower_level */
869
- /* */
870
- /*******************************************************************/
871
-
872
- Transducer *lower_level( Transducer *t )
873
-
874
- {
875
- Transducer *nt = &t->lower_level();
876
- delete t;
877
- return nt;
878
- }
879
-
880
-
881
- /*******************************************************************/
882
- /* */
883
- /* minimise */
884
- /* */
885
- /*******************************************************************/
886
-
887
- Transducer *minimise( Transducer *t )
888
-
889
- {
890
- t->alphabet.copy(TheAlphabet);
891
- Transducer *nt = &t->minimise( Verbose );
892
- delete t;
893
- return nt;
894
- }
895
-
896
-
897
- /*******************************************************************/
898
- /* */
899
- /* switch_levels */
900
- /* */
901
- /*******************************************************************/
902
-
903
- Transducer *switch_levels( Transducer *t )
904
-
905
- {
906
- Transducer *nt = &t->switch_levels();
907
- delete t;
908
- return nt;
909
- }
910
-
911
-
912
- /*******************************************************************/
913
- /* */
914
- /* repetition */
915
- /* */
916
- /*******************************************************************/
917
-
918
- Transducer *repetition( Transducer *t )
919
-
920
- {
921
- Transducer *nt = &(t->kleene_star());
922
- delete t;
923
- return nt;
924
- }
925
-
926
-
927
- /*******************************************************************/
928
- /* */
929
- /* repetition2 */
930
- /* */
931
- /*******************************************************************/
932
-
933
- Transducer *repetition2( Transducer *t )
934
-
935
- {
936
- Transducer *t1 = &(t->kleene_star());
937
- Transducer *nt = &(*t + *t1);
938
- delete t;
939
- delete t1;
940
- return nt;
941
- }
942
-
943
-
944
- /*******************************************************************/
945
- /* */
946
- /* optional */
947
- /* */
948
- /*******************************************************************/
949
-
950
- Transducer *optional( Transducer *t )
951
-
952
- {
953
- Transducer *nt = &(t->copy());
954
- nt->root_node()->set_final(1);
955
- delete t;
956
- return nt;
957
- }
958
-
959
-
960
- /*******************************************************************/
961
- /* */
962
- /* add_pi_transitions */
963
- /* */
964
- /*******************************************************************/
965
-
966
- static void add_pi_transitions( Transducer *t, Node *node, Alphabet &alph )
967
-
968
- {
969
- for( Alphabet::const_iterator it=alph.begin(); it!=alph.end(); it++)
970
- node->add_arc( *it, node, t );
971
- }
972
-
973
-
974
- /*******************************************************************/
975
- /* */
976
- /* pi_machine */
977
- /* */
978
- /*******************************************************************/
979
-
980
- static Transducer *pi_machine( Alphabet &alph )
981
-
982
- {
983
- Transducer *t=new Transducer();
984
- t->root_node()->set_final(1);
985
- add_pi_transitions( t, t->root_node(), alph );
986
- return t;
987
- }
988
-
989
-
990
- /*******************************************************************/
991
- /* */
992
- /* cp */
993
- /* */
994
- /*******************************************************************/
995
-
996
- static Transducer *cp( Range *lower_range, Range *upper_range )
997
-
998
- {
999
- return make_transducer(lower_range, upper_range);
1000
- }
1001
-
1002
-
1003
- /*******************************************************************/
1004
- /* */
1005
- /* anti_cp */
1006
- /* */
1007
- /*******************************************************************/
1008
-
1009
- static Transducer *anti_cp( Range *lower_range, Range *upper_range )
1010
-
1011
- {
1012
- Transducer *cpt = cp(lower_range, upper_range);
1013
- Transducer *t=new Transducer();
1014
- Node *node=t->new_node();
1015
-
1016
- node->set_final(1);
1017
- for(Alphabet::const_iterator it=TheAlphabet.begin();
1018
- it!=TheAlphabet.end(); it++){
1019
- Label l=*it;
1020
- if (in_range(l.lower_char(), lower_range) &&
1021
- !cpt->root_node()->target_node(l))
1022
- t->root_node()->add_arc( l, node, t );
1023
- }
1024
- if (in_range(Label::epsilon, lower_range) &&
1025
- !cpt->root_node()->target_node(Label()))
1026
- t->root_node()->add_arc( Label(), node, t );
1027
-
1028
- delete cpt;
1029
- return t;
1030
- }
1031
-
1032
-
1033
- /*******************************************************************/
1034
- /* */
1035
- /* twol_right_rule */
1036
- /* */
1037
- /*******************************************************************/
1038
-
1039
- static Transducer *twol_right_rule( Transducer *lc, Range *lower_range,
1040
- Range *upper_range, Transducer *rc )
1041
-
1042
- {
1043
- // Build the rule transducer
1044
- Transducer *cpt = cp(lower_range, upper_range);
1045
- Transducer *pi=pi_machine(TheAlphabet);
1046
-
1047
- // First unwanted language
1048
-
1049
- lc->alphabet.copy(TheAlphabet);
1050
- Transducer *notlc = &(!*lc);
1051
- Transducer *tmp = &(*notlc + *cpt);
1052
- delete notlc;
1053
- Transducer *t1 = &(*tmp + *pi);
1054
- delete tmp;
1055
-
1056
- // Second unwanted language
1057
- rc->alphabet.copy(TheAlphabet);
1058
- Transducer *notrc = &(!*rc);
1059
- tmp = &(*cpt + *notrc);
1060
- delete cpt;
1061
- delete notrc;
1062
- Transducer *t2 = &(*pi + *tmp);
1063
- delete pi;
1064
- delete tmp;
1065
-
1066
- tmp = &(*t1|*t2);
1067
- delete t1;
1068
- delete t2;
1069
-
1070
- tmp->alphabet.copy(TheAlphabet);
1071
- t1 = &(!*tmp);
1072
- delete tmp;
1073
-
1074
- return t1;
1075
- }
1076
-
1077
-
1078
- /*******************************************************************/
1079
- /* */
1080
- /* twol_left_rule */
1081
- /* */
1082
- /*******************************************************************/
1083
-
1084
- static Transducer *twol_left_rule( Transducer *lc, Range *lower_range,
1085
- Range *upper_range, Transducer *rc )
1086
-
1087
- {
1088
- // check for problematic insertion operations like "$L <> <= a $R"
1089
- // where either $L or $R includes the empty string
1090
- if (in_range(Label::epsilon, lower_range)) {
1091
- if (lc->generates_empty_string())
1092
- error("in two level rule: insertion operation with deletable left context!");
1093
- if (rc->generates_empty_string())
1094
- error("in two level rule: insertion operation with deletable right context!");
1095
- cerr << "\nWarning: two level rule used for insertion operation (might produce unexpected results)\n";
1096
- }
1097
-
1098
- // Build the rule transducer
1099
- Transducer *t1 = anti_cp(lower_range, upper_range);
1100
-
1101
- // Add the left context;
1102
- Transducer *t2 = &(*lc + *t1);
1103
- delete t1;
1104
-
1105
- // Add the right context;
1106
- t1 = &(*t2 + *rc);
1107
- delete t2;
1108
-
1109
- // Form the complement
1110
- t1->alphabet.copy(TheAlphabet);
1111
- t2 = &(!*t1);
1112
- delete t1;
1113
-
1114
- return t2;
1115
- }
1116
-
1117
-
1118
- /*******************************************************************/
1119
- /* */
1120
- /* make_rule */
1121
- /* */
1122
- /*******************************************************************/
1123
-
1124
- Transducer *make_rule( Transducer *lc, Range *lower_range, Twol_Type type,
1125
- Range *upper_range, Transducer *rc )
1126
-
1127
- {
1128
- if (RS.size() > 0 || RSS.size() > 0)
1129
- cerr << "\nWarning: agreement operation inside of replacement rule!\n";
1130
-
1131
- if (!Alphabet_Defined)
1132
- error("Two level rules require the definition of an alphabet");
1133
-
1134
- // expand the left and the right contexts to their full length
1135
- Transducer *pi=pi_machine(TheAlphabet);
1136
-
1137
- if (lc == NULL)
1138
- lc = pi_machine(TheAlphabet);
1139
- else {
1140
- Transducer *tmp = &(*pi + *lc);
1141
- delete lc;
1142
- lc = tmp;
1143
- }
1144
- if (rc == NULL)
1145
- rc = pi_machine(TheAlphabet);
1146
- else {
1147
- Transducer *tmp = &(*rc + *pi);
1148
- delete rc;
1149
- rc = tmp;
1150
- }
1151
- delete pi;
1152
-
1153
- Transducer *result = NULL;
1154
-
1155
- switch (type) {
1156
- case twol_left:
1157
- result = twol_left_rule(lc, lower_range, upper_range, rc);
1158
- break;
1159
- case twol_right:
1160
- result = twol_right_rule(lc, lower_range, upper_range, rc);
1161
- break;
1162
- case twol_both:
1163
- {
1164
- Transducer *t1 = twol_left_rule(lc, lower_range, upper_range, rc);
1165
- Transducer *t2 = twol_right_rule(lc, lower_range, upper_range, rc);
1166
- result = &(*t1 & *t2);
1167
- delete t1;
1168
- delete t2;
1169
- }
1170
- }
1171
- delete lc;
1172
- delete rc;
1173
- if (lower_range != upper_range)
1174
- free_values(lower_range);
1175
- free_values(upper_range);
1176
-
1177
- return minimise(result);
1178
- }
1179
-
1180
-
1181
- /*******************************************************************/
1182
- /* */
1183
- /* make_context */
1184
- /* */
1185
- /*******************************************************************/
1186
-
1187
- Contexts *make_context( Transducer *l, Transducer *r )
1188
-
1189
- {
1190
- if (l == NULL)
1191
- l = empty_transducer();
1192
- if (r == NULL)
1193
- r = empty_transducer();
1194
-
1195
- Contexts *c=new Contexts();
1196
- c->left = l;
1197
- c->right = r;
1198
- c->next = NULL;
1199
-
1200
- return c;
1201
- }
1202
-
1203
-
1204
- /*******************************************************************/
1205
- /* */
1206
- /* add_context */
1207
- /* */
1208
- /*******************************************************************/
1209
-
1210
- Contexts *add_context( Contexts *nc, Contexts *c )
1211
-
1212
- {
1213
- nc->next = c;
1214
- return nc;
1215
- }
1216
-
1217
-
1218
- /*******************************************************************/
1219
- /* */
1220
- /* restriction_transducer */
1221
- /* */
1222
- /*******************************************************************/
1223
-
1224
- static Transducer *restriction_transducer( Transducer *l1, Transducer *l2,
1225
- Character marker )
1226
-
1227
- {
1228
- l1->alphabet.copy(TheAlphabet);
1229
- Transducer *t1 = &(*l1 / *l2);
1230
-
1231
- Transducer *t2 = &t1->replace_char(marker, Label::epsilon);
1232
- delete t1;
1233
-
1234
- t2->alphabet.copy(TheAlphabet);
1235
- t1 = &(!*t2);
1236
- delete t2;
1237
-
1238
- return t1;
1239
- }
1240
-
1241
-
1242
- /*******************************************************************/
1243
- /* */
1244
- /* marker_transducer */
1245
- /* */
1246
- /*******************************************************************/
1247
-
1248
- static Transducer *marker_transducer( Transducer *t, Contexts *c,
1249
- Character &marker )
1250
- {
1251
- marker = TheAlphabet.new_marker();
1252
- Transducer *result = one_label_transducer( Label(marker) );
1253
-
1254
- // build the alphabet with a new marker
1255
- result->alphabet.insert_symbols(t->alphabet);
1256
- while (c) {
1257
- result->alphabet.insert_symbols(c->left->alphabet);
1258
- result->alphabet.insert_symbols(c->right->alphabet);
1259
- c = c->next;
1260
- }
1261
-
1262
- return result;
1263
- }
1264
-
1265
-
1266
- /*******************************************************************/
1267
- /* */
1268
- /* center_transducer */
1269
- /* */
1270
- /*******************************************************************/
1271
-
1272
- static Transducer *center_transducer( Transducer *t, Transducer *pi,
1273
- Transducer *mt )
1274
- {
1275
- // create the concatenation pi + mt + *t + mt + pi
1276
- Transducer *t1=&(*pi + *mt);
1277
- Transducer *t2=&(*t1 + *t);
1278
- delete t1;
1279
- t1 = &(*t2 + *mt);
1280
- delete t2;
1281
- t2 = &(*t1 + *pi);
1282
- delete t1;
1283
- return t2;
1284
- }
1285
-
1286
-
1287
- /*******************************************************************/
1288
- /* */
1289
- /* context_transducer */
1290
- /* */
1291
- /*******************************************************************/
1292
-
1293
- static Transducer *context_transducer( Transducer *t, Transducer *pi,
1294
- Transducer *mt, Contexts *c )
1295
- {
1296
- // pi + left[i] + mt + pi + mt + right[i] + pi
1297
-
1298
- Transducer *t1 = &(*mt + *t);
1299
- Transducer *tmp = &(*t1 + *mt);
1300
- delete t1;
1301
- Transducer *result=NULL;
1302
-
1303
- while (c) {
1304
- t1 = &(*pi + *c->left);
1305
- Transducer *t2 = &(*t1 + *tmp);
1306
- delete t1;
1307
- t1 = &(*t2 + *c->right);
1308
- delete t2;
1309
- t2 = &(*t1 + *pi);
1310
- delete t1;
1311
-
1312
- if (result) {
1313
- t1 = &(*result | *t2);
1314
- delete t2;
1315
- result = t1;
1316
- }
1317
- else
1318
- result = t2;
1319
-
1320
- c = c->next;
1321
- }
1322
- delete tmp;
1323
-
1324
- return result;
1325
- }
1326
-
1327
-
1328
-
1329
- /*******************************************************************/
1330
- /* */
1331
- /* result_transducer */
1332
- /* */
1333
- /*******************************************************************/
1334
-
1335
- static Transducer *result_transducer( Transducer *l1, Transducer *l2,
1336
- Twol_Type type, Character marker )
1337
- {
1338
- Transducer *result=NULL;
1339
- if (type == twol_right)
1340
- result = restriction_transducer( l1, l2, marker );
1341
- else if (type == twol_left)
1342
- result = restriction_transducer( l2, l1, marker );
1343
- else if (type == twol_both) {
1344
- Transducer *t1 = restriction_transducer( l1, l2, marker );
1345
- Transducer *t2 = restriction_transducer( l2, l1, marker );
1346
- result = &(*t1 & *t2);
1347
- delete t1;
1348
- delete t2;
1349
- }
1350
-
1351
- return result;
1352
- }
1353
-
1354
-
1355
- /*******************************************************************/
1356
- /* */
1357
- /* restriction */
1358
- /* */
1359
- /*******************************************************************/
1360
-
1361
- Transducer *restriction( Transducer *t, Twol_Type type, Contexts *c,
1362
- int direction )
1363
- {
1364
- Character marker;
1365
- Transducer *mt=marker_transducer( t, c, marker );
1366
- Transducer *pi=pi_machine(TheAlphabet);
1367
- Transducer *l1=center_transducer( t, pi, mt );
1368
-
1369
- Transducer *tmp;
1370
- if (direction == 0)
1371
- tmp = pi;
1372
- else if (direction == 1) {
1373
- // compute _t || .*
1374
- Transducer *t1 = &t->lower_level();
1375
- tmp = &(*t1 || *pi);
1376
- delete t1;
1377
- }
1378
- else {
1379
- // compute ^t || .*
1380
- Transducer *t1 = &t->upper_level();
1381
- tmp = &(*pi || *t1);
1382
- delete t1;
1383
- }
1384
- delete t;
1385
-
1386
- Transducer *l2=context_transducer( tmp, pi, mt, c );
1387
- if (tmp != pi)
1388
- delete tmp;
1389
- delete pi;
1390
- delete mt;
1391
-
1392
- Transducer *result=result_transducer( l1, l2, type, marker );
1393
- delete l1;
1394
- delete l2;
1395
-
1396
- free_contexts( c );
1397
-
1398
- return result;
1399
- }
1400
-
1401
-
1402
- /*******************************************************************/
1403
- /* */
1404
- /* constrain_boundary_transducer */
1405
- /* */
1406
- /*******************************************************************/
1407
-
1408
- static Transducer *constrain_boundary_transducer( Character leftm,
1409
- Character rightm )
1410
- {
1411
- // create the transducer (.|<L>|<R>)*
1412
-
1413
- Transducer *tmp=pi_machine(TheAlphabet);
1414
-
1415
- // create the transducer (.|<L>|<R>)* <L><R> (.|<L>|<R>)*
1416
- Node *root = tmp->root_node();
1417
- Node *node = tmp->new_node();
1418
- Node *last = tmp->new_node();
1419
-
1420
- root->set_final(0);
1421
- last->set_final(1);
1422
-
1423
- root->add_arc( Label(leftm), node, tmp);
1424
- node->add_arc( Label(rightm), last, tmp);
1425
-
1426
- add_pi_transitions( tmp, last, TheAlphabet );
1427
-
1428
- // create the transducer !((.|<L>|<R>)* <L><R> (.|<L>|<R>)*)
1429
- tmp->alphabet.copy(TheAlphabet);
1430
- Transducer *result = &(!*tmp);
1431
- delete tmp;
1432
-
1433
- return result;
1434
- }
1435
-
1436
-
1437
- /*******************************************************************/
1438
- /* */
1439
- /* extended_left_transducer */
1440
- /* */
1441
- /*******************************************************************/
1442
-
1443
- static Transducer *extended_left_transducer( Transducer *t,
1444
- Character m1, Character m2 )
1445
- {
1446
- if (t == NULL) // empty context
1447
- return pi_machine(TheAlphabet);
1448
-
1449
- // Extended left context transducer
1450
-
1451
- // <R> >> (<L> >> $T$)
1452
- Transducer *tmp=&t->freely_insert( Label(m1) );
1453
- delete t;
1454
- t = &tmp->freely_insert( Label(m2) );
1455
- delete tmp;
1456
-
1457
- // .* (<R> >> (<L> >> $T$))
1458
- add_pi_transitions( t, t->root_node(), TheAlphabet );
1459
-
1460
- // !(.*<L>)
1461
- tmp = one_label_transducer(Label(m1));
1462
- add_pi_transitions( tmp, tmp->root_node(), TheAlphabet );
1463
- tmp->alphabet.copy(TheAlphabet);
1464
- Transducer *t2 = &(!*tmp);
1465
- delete tmp;
1466
-
1467
- // .* (<R> >> (<L> >> $T$)) || !(.*<L>)
1468
- tmp = &(*t || *t2);
1469
- delete t;
1470
- delete t2;
1471
-
1472
- return tmp;
1473
- }
1474
-
1475
-
1476
- /*******************************************************************/
1477
- /* */
1478
- /* left_context */
1479
- /* */
1480
- /*******************************************************************/
1481
-
1482
- static Transducer *left_context( Transducer *t, Character m1, Character m2 )
1483
-
1484
- {
1485
- // .* (<R> >> (<L> >> $T$)) || !(.*<L>)
1486
- Transducer *ct = extended_left_transducer(t, m1, m2);
1487
-
1488
- // <R>* <L> .*
1489
- Transducer *mt = one_label_transducer(Label(m1));
1490
- mt->root_node()->add_arc(Label(m2), mt->root_node(), mt );
1491
- add_pi_transitions(mt, mt->root_node()->target_node(Label(m1)),TheAlphabet);
1492
-
1493
- ct->alphabet.copy(TheAlphabet);
1494
- Transducer *no_ct = &!*ct;
1495
-
1496
- mt->alphabet.copy(TheAlphabet);
1497
- Transducer *no_mt = &!*mt;
1498
-
1499
- {
1500
- static int print=1;
1501
- if (print) {
1502
- print = 0;
1503
- Transducer *temp = &(ct->copy());
1504
- temp = &(no_ct->copy());
1505
- temp = &(mt->copy());
1506
- temp = &(no_mt->copy());
1507
- }
1508
- }
1509
-
1510
- Transducer *t1 = &(*no_ct + *mt);
1511
- delete no_ct;
1512
- delete mt;
1513
-
1514
- Transducer *t2 = &(*ct + *no_mt);
1515
- delete ct;
1516
- delete no_mt;
1517
-
1518
- Transducer *tmp = &(*t1 | *t2);
1519
- delete t1;
1520
- delete t2;
1521
-
1522
- tmp->alphabet.copy(TheAlphabet);
1523
- t1 = &!*tmp;
1524
- delete tmp;
1525
-
1526
- return t1;
1527
- }
1528
-
1529
-
1530
- /*******************************************************************/
1531
- /* */
1532
- /* make_optional */
1533
- /* */
1534
- /*******************************************************************/
1535
-
1536
- static Transducer *make_optional( Transducer *t )
1537
-
1538
- {
1539
- Transducer *t1 = pi_machine(TheAlphabet);
1540
- Transducer *t2 = &(*t | *t1);
1541
- delete t;
1542
- delete t1;
1543
- return t2;
1544
- }
1545
-
1546
-
1547
- /*******************************************************************/
1548
- /* */
1549
- /* replace */
1550
- /* */
1551
- /*******************************************************************/
1552
-
1553
- Transducer *replace( Transducer *ct, Repl_Type type, bool optional )
1554
-
1555
- {
1556
- // compute the no-center transducer
1557
- Transducer *tmp;
1558
-
1559
- if (type == repl_up)
1560
- // _ct
1561
- tmp = &ct->lower_level();
1562
- else if (type == repl_down)
1563
- // ^ct
1564
- tmp = &ct->upper_level();
1565
- else
1566
- error("Invalid type of replace operator");
1567
-
1568
- // .* _ct
1569
- add_pi_transitions( tmp, tmp->root_node(), TheAlphabet );
1570
-
1571
- // .* _ct .*
1572
- Transducer *t2 = pi_machine(TheAlphabet);
1573
- Transducer *t3 = &(*tmp + *t2);
1574
- delete tmp;
1575
- delete t2;
1576
-
1577
- // no_ct = !(.* _ct .*)
1578
- t3->alphabet.copy(TheAlphabet);
1579
- Transducer *no_ct = &(!*t3);
1580
- delete t3;
1581
-
1582
- // compute the unconditional replacement transducer
1583
-
1584
- // no-ct ct
1585
- tmp = &(*no_ct + *ct);
1586
- delete ct;
1587
-
1588
- // (no-ct ct)*
1589
- t2 = &(tmp->kleene_star());
1590
- delete tmp;
1591
-
1592
- // (no-ct ct)* no-ct
1593
- tmp = &(*t2 + *no_ct);
1594
- delete t2;
1595
- delete no_ct;
1596
-
1597
- if (optional)
1598
- tmp = make_optional(tmp);
1599
-
1600
- return tmp;
1601
- }
1602
-
1603
-
1604
- /*******************************************************************/
1605
- /* */
1606
- /* replace_transducer */
1607
- /* */
1608
- /*******************************************************************/
1609
-
1610
- static Transducer *replace_transducer( Transducer *ct, Character lm,
1611
- Character rm, Repl_Type type )
1612
- {
1613
- // insert boundary markers into the center transducer
1614
-
1615
- // <L> >> (<R> >> $Center$)
1616
- Transducer *tmp = &ct->freely_insert(Label(lm));
1617
- delete ct;
1618
- ct = &tmp->freely_insert(Label(rm));
1619
- delete tmp;
1620
-
1621
- // add surrounding boundary markers to the center transducer
1622
-
1623
- // <L> (<L> >> (<R> >> $Center$))
1624
- Transducer *t2 = one_label_transducer( Label(lm) );
1625
- tmp = &(*t2 + *ct);
1626
- delete t2;
1627
- delete ct;
1628
-
1629
- // $CenterB$ = <L> (<L> >> (<R> >> $Center$)) <R>
1630
- t2 = one_label_transducer( Label(rm) );
1631
- ct = &(*tmp + *t2);
1632
- delete tmp;
1633
- delete t2;
1634
-
1635
- return replace(ct, type, false);
1636
- }
1637
-
1638
-
1639
- /*******************************************************************/
1640
- /* */
1641
- /* replace_in_context */
1642
- /* */
1643
- /*******************************************************************/
1644
-
1645
- Transducer *replace_in_context( Transducer *t, Repl_Type type, Contexts *c,
1646
- bool optional )
1647
- {
1648
- // The implementation of the replace operators is based on
1649
- // "The Replace Operator" by Lauri Karttunen
1650
-
1651
- if (!Alphabet_Defined)
1652
- error("The replace operators require the definition of an alphabet");
1653
-
1654
- if (!c->left->is_automaton() || !c->right->is_automaton())
1655
- error("The replace operators require automata as context expressions!");
1656
-
1657
- // create the marker symbols
1658
- Character leftm = TheAlphabet.new_marker();
1659
- Character rightm = TheAlphabet.new_marker();
1660
-
1661
- /////////////////////////////////////////////////////////////
1662
- // Create the insert boundaries transducer (.|<>:<L>|<>:<R>)*
1663
- /////////////////////////////////////////////////////////////
1664
-
1665
- Transducer *ibt=pi_machine(TheAlphabet);
1666
- Node *root=ibt->root_node();
1667
- root->add_arc( Label(Label::epsilon, leftm), root, ibt);
1668
- root->add_arc( Label(Label::epsilon, rightm),root, ibt);
1669
-
1670
- /////////////////////////////////////////////////////////////
1671
- // Create the remove boundaries transducer (.|<L>:<>|<R>:<>)*
1672
- /////////////////////////////////////////////////////////////
1673
-
1674
- Transducer *rbt=pi_machine(TheAlphabet);
1675
- root = rbt->root_node();
1676
- root->add_arc( Label(leftm, Label::epsilon), root, rbt);
1677
- root->add_arc( Label(rightm,Label::epsilon), root, rbt);
1678
-
1679
- // Add the markers to the alphabet
1680
- TheAlphabet.insert(Label(leftm));
1681
- TheAlphabet.insert(Label(rightm));
1682
-
1683
- /////////////////////////////////////////////////////////////
1684
- // Create the constrain boundaries transducer !(.*<L><R>.*)
1685
- /////////////////////////////////////////////////////////////
1686
-
1687
- Transducer *cbt=constrain_boundary_transducer(leftm, rightm);
1688
-
1689
- /////////////////////////////////////////////////////////////
1690
- // Create the extended context transducers
1691
- /////////////////////////////////////////////////////////////
1692
-
1693
- // left context transducer: .* (<R> >> (<L> >> $T$)) || !(.*<L>)
1694
- Transducer *lct = left_context(c->left, leftm, rightm);
1695
-
1696
- // right context transducer: (<R> >> (<L> >> $T$)) .* || !(<R>.*)
1697
- Transducer *tmp = &c->right->reverse();
1698
- delete c->right;
1699
- Transducer *t2 = left_context(tmp, rightm, leftm);
1700
- Transducer *rct = &t2->reverse();
1701
- delete t2;
1702
-
1703
- /////////////////////////////////////////////////////////////
1704
- // unconditional replace transducer
1705
- /////////////////////////////////////////////////////////////
1706
-
1707
- Transducer *rt;
1708
- if (type == repl_up || type == repl_right || type == repl_left)
1709
- rt = replace_transducer( t, leftm, rightm, repl_up );
1710
- else
1711
- rt = replace_transducer( t, leftm, rightm, repl_down );
1712
-
1713
- /////////////////////////////////////////////////////////////
1714
- // build the conditional replacement transducer
1715
- /////////////////////////////////////////////////////////////
1716
-
1717
- tmp = &(ibt->copy());
1718
- tmp = &(cbt->copy());
1719
- tmp = &(lct->copy());
1720
- tmp = &(rct->copy());
1721
- tmp = &(rt->copy());
1722
- tmp = &(rbt->copy());
1723
-
1724
- tmp = ibt;
1725
- tmp = &(*ibt || *cbt);
1726
- delete(ibt);
1727
- delete(cbt);
1728
-
1729
- if (type == repl_up || type == repl_left) {
1730
- t2 = &(*tmp || *lct);
1731
- delete tmp;
1732
- delete lct;
1733
- tmp = t2;
1734
- }
1735
- if (type == repl_up || type == repl_right) {
1736
- t2 = &(*tmp || *rct);
1737
- delete tmp;
1738
- delete rct;
1739
- tmp = t2;
1740
- }
1741
-
1742
- t2 = &(*tmp || *rt);
1743
- delete tmp;
1744
- delete rt;
1745
- tmp = t2;
1746
-
1747
- if (type == repl_down || type == repl_right) {
1748
- t2 = &(*tmp || *lct);
1749
- delete tmp;
1750
- delete lct;
1751
- tmp = t2;
1752
- }
1753
- if (type == repl_down || type == repl_left) {
1754
- t2 = &(*tmp || *rct);
1755
- delete tmp;
1756
- delete rct;
1757
- tmp = t2;
1758
- }
1759
-
1760
- t2 = &(*tmp || *rbt);
1761
- delete tmp;
1762
- delete rbt;
1763
-
1764
- // Remove the markers from the alphabet
1765
- TheAlphabet.delete_markers();
1766
-
1767
- if (optional)
1768
- t2 = make_optional(t2);
1769
-
1770
- free_contexts( c );
1771
-
1772
- return t2;
1773
- }
1774
-
1775
-
1776
- /*******************************************************************/
1777
- /* */
1778
- /* add_alphabet */
1779
- /* */
1780
- /*******************************************************************/
1781
-
1782
- void add_alphabet( Transducer *t )
1783
-
1784
- {
1785
- t->alphabet.copy(TheAlphabet);
1786
- t->complete_alphabet();
1787
- }
1788
-
1789
-
1790
- /*******************************************************************/
1791
- /* */
1792
- /* write_to_file */
1793
- /* */
1794
- /*******************************************************************/
1795
-
1796
- void write_to_file( Transducer *t, char *filename)
1797
-
1798
- {
1799
- FILE *file;
1800
- if ((file = fopen(filename,"wb")) == NULL) {
1801
- fprintf(stderr,"\nError: Cannot open output file \"%s\"\n\n", filename);
1802
- exit(1);
1803
- }
1804
- free( filename );
1805
-
1806
- t = explode(t);
1807
- add_alphabet(t);
1808
- t = minimise(t);
1809
- t->store(file);
1810
- fclose(file);
1811
- }
1812
-
1813
-
1814
- /*******************************************************************/
1815
- /* */
1816
- /* result */
1817
- /* */
1818
- /*******************************************************************/
1819
-
1820
- Transducer *result( Transducer *t, bool switch_flag )
1821
-
1822
- {
1823
- t = explode(t);
1824
-
1825
- // delete the variable values
1826
- vector<char*> s;
1827
- for( VarMap::iterator it=VM.begin(); it != VM.end(); it++ ) {
1828
- s.push_back(it->first);
1829
- delete it->second;
1830
- it->second = NULL;
1831
- }
1832
- VM.clear();
1833
- for( size_t i=0; i<s.size(); i++ )
1834
- free(s[i]);
1835
- s.clear();
1836
-
1837
- if (switch_flag)
1838
- t = switch_levels(t);
1839
- add_alphabet(t);
1840
- t = minimise(t);
1841
- return t;
1842
- }