ruby-sfst 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -0
- data/COPYING +280 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +54 -0
- data/README.md +1 -1
- data/Rakefile +9 -18
- data/bin/console +7 -0
- data/bin/setup +6 -0
- data/ext/sfst/alphabet.cc +879 -0
- data/ext/sfst/alphabet.h +302 -0
- data/ext/sfst/basic.cc +85 -0
- data/ext/{sfst_machine → sfst}/basic.h +7 -4
- data/ext/sfst/compact.cc +629 -0
- data/ext/sfst/compact.h +100 -0
- data/ext/sfst/determinise.cc +279 -0
- data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
- data/ext/sfst/fst.cc +1150 -0
- data/ext/sfst/fst.h +374 -0
- data/ext/sfst/hopcroft.cc +681 -0
- data/ext/sfst/interface.cc +1921 -0
- data/ext/sfst/interface.h +171 -0
- data/ext/sfst/make-compact.cc +323 -0
- data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
- data/ext/sfst/mem.h +80 -0
- data/ext/sfst/operators.cc +1273 -0
- data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
- data/ext/sfst/sgi.h +72 -0
- data/ext/sfst/utf8.cc +149 -0
- data/ext/{sfst_machine → sfst}/utf8.h +7 -4
- data/lib/sfst.rb +2 -1
- data/lib/sfst/version.rb +1 -1
- data/ruby-sfst.gemspec +23 -23
- metadata +107 -35
- data/ext/sfst_machine/alphabet.cc +0 -812
- data/ext/sfst_machine/alphabet.h +0 -273
- data/ext/sfst_machine/basic.cc +0 -84
- data/ext/sfst_machine/compact.cc +0 -616
- data/ext/sfst_machine/compact.h +0 -98
- data/ext/sfst_machine/determinise.cc +0 -303
- data/ext/sfst_machine/fst.cc +0 -1000
- data/ext/sfst_machine/fst.h +0 -369
- data/ext/sfst_machine/interface.cc +0 -1842
- data/ext/sfst_machine/interface.h +0 -93
- data/ext/sfst_machine/make-compact.cc +0 -327
- data/ext/sfst_machine/mem.h +0 -74
- data/ext/sfst_machine/operators.cc +0 -1131
- data/ext/sfst_machine/sgi.h +0 -44
- data/ext/sfst_machine/utf8.cc +0 -146
- data/test/test_sfst.fst +0 -3
- data/test/test_sfst.rb +0 -114
data/ext/sfst_machine/fst.h
DELETED
@@ -1,369 +0,0 @@
|
|
1
|
-
/*******************************************************************/
|
2
|
-
/* */
|
3
|
-
/* FILE fst.h */
|
4
|
-
/* MODULE fst */
|
5
|
-
/* PROGRAM SFST */
|
6
|
-
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
|
7
|
-
/* */
|
8
|
-
/* PURPOSE finite state tools */
|
9
|
-
/* */
|
10
|
-
/*******************************************************************/
|
11
|
-
|
12
|
-
#ifndef _FST_H_
|
13
|
-
#define _FST_H_
|
14
|
-
|
15
|
-
#include "alphabet.h"
|
16
|
-
|
17
|
-
|
18
|
-
/*******************************************************************/
|
19
|
-
/* include commands */
|
20
|
-
/*******************************************************************/
|
21
|
-
|
22
|
-
#include <string>
|
23
|
-
|
24
|
-
#include <vector>
|
25
|
-
|
26
|
-
#include "mem.h"
|
27
|
-
|
28
|
-
typedef unsigned short VType;
|
29
|
-
|
30
|
-
extern int Quiet;
|
31
|
-
|
32
|
-
class Node;
|
33
|
-
class Arc;
|
34
|
-
class Arcs;
|
35
|
-
class Transducer;
|
36
|
-
|
37
|
-
|
38
|
-
struct hashf {
|
39
|
-
size_t operator()(const Node *n) const { return (size_t) n; }
|
40
|
-
};
|
41
|
-
struct equalf {
|
42
|
-
int operator()(const Node *n1, const Node *n2) const { return n1==n2; }
|
43
|
-
};
|
44
|
-
typedef hash_set<Node*, hashf, equalf> NodeHashSet;
|
45
|
-
|
46
|
-
|
47
|
-
/***************** class Arc *************************************/
|
48
|
-
|
49
|
-
class Arc {
|
50
|
-
|
51
|
-
private:
|
52
|
-
Label l;
|
53
|
-
Node *target;
|
54
|
-
Arc *next;
|
55
|
-
|
56
|
-
public:
|
57
|
-
void init( Label ll, Node *node ) { l=ll; target=node; };
|
58
|
-
Label label( void ) const { return l; };
|
59
|
-
Node *target_node( void ) { return target; };
|
60
|
-
const Node *target_node( void ) const { return target; };
|
61
|
-
|
62
|
-
friend class Arcs;
|
63
|
-
friend class ArcsIter;
|
64
|
-
};
|
65
|
-
|
66
|
-
|
67
|
-
/***************** class Arcs ************************************/
|
68
|
-
|
69
|
-
class Arcs {
|
70
|
-
|
71
|
-
private:
|
72
|
-
Arc *first_arcp;
|
73
|
-
Arc *first_epsilon_arcp;
|
74
|
-
|
75
|
-
public:
|
76
|
-
void init( void ) { first_arcp = first_epsilon_arcp = NULL; };
|
77
|
-
Arcs( void ) { init(); };
|
78
|
-
Node *target_node( Label l );
|
79
|
-
const Node *target_node( Label l ) const;
|
80
|
-
void add_arc( Label, Node*, Transducer* );
|
81
|
-
int remove_arc( Arc* );
|
82
|
-
bool is_empty( void ) const { return !(first_arcp || first_epsilon_arcp); };
|
83
|
-
bool epsilon_transition_exists( void ) const { return first_epsilon_arcp != NULL; };
|
84
|
-
bool non_epsilon_transition_exists( void ) const { return first_arcp != NULL; };
|
85
|
-
int size( void ) const;
|
86
|
-
|
87
|
-
friend class ArcsIter;
|
88
|
-
};
|
89
|
-
|
90
|
-
|
91
|
-
/***************** class ArcsIter ********************************/
|
92
|
-
|
93
|
-
class ArcsIter {
|
94
|
-
|
95
|
-
// ArcsIter iterates over the arcs starting with epsilon arcs
|
96
|
-
|
97
|
-
private:
|
98
|
-
Arc *current_arcp;
|
99
|
-
Arc *more_arcs;
|
100
|
-
|
101
|
-
public:
|
102
|
-
typedef enum {all,non_eps,eps} IterType;
|
103
|
-
|
104
|
-
ArcsIter( const Arcs *arcs, IterType type=all ) {
|
105
|
-
more_arcs = NULL;
|
106
|
-
if (type == all) {
|
107
|
-
if (arcs->first_epsilon_arcp) {
|
108
|
-
current_arcp = arcs->first_epsilon_arcp;
|
109
|
-
more_arcs = arcs->first_arcp;
|
110
|
-
}
|
111
|
-
else
|
112
|
-
current_arcp = arcs->first_arcp;
|
113
|
-
}
|
114
|
-
else if (type == non_eps)
|
115
|
-
current_arcp = arcs->first_arcp;
|
116
|
-
else
|
117
|
-
current_arcp = arcs->first_epsilon_arcp;
|
118
|
-
};
|
119
|
-
|
120
|
-
void operator++( int ) {
|
121
|
-
if (current_arcp) {
|
122
|
-
current_arcp = current_arcp->next;
|
123
|
-
if (!current_arcp && more_arcs) {
|
124
|
-
current_arcp = more_arcs;
|
125
|
-
more_arcs = NULL;
|
126
|
-
}
|
127
|
-
}
|
128
|
-
};
|
129
|
-
operator Arc*( void ) { return current_arcp; };
|
130
|
-
|
131
|
-
};
|
132
|
-
|
133
|
-
|
134
|
-
/***************** class Node ************************************/
|
135
|
-
|
136
|
-
class Node {
|
137
|
-
|
138
|
-
private:
|
139
|
-
bool final;
|
140
|
-
VType visited;
|
141
|
-
Arcs arcsp;
|
142
|
-
Node *forwardp;
|
143
|
-
|
144
|
-
public:
|
145
|
-
Node( void ) { init(); };
|
146
|
-
void init( void );
|
147
|
-
bool is_final( void ) const { return final; };
|
148
|
-
void set_final( bool flag ) { final = flag; };
|
149
|
-
void set_forward( Node *node ) { forwardp = node; };
|
150
|
-
const Node *target_node( Label l ) const { return arcs()->target_node(l); };
|
151
|
-
Node *target_node( Label l ) { return arcs()->target_node(l); };
|
152
|
-
void add_arc( Label l, Node *n, Transducer *a ) { arcs()->add_arc(l, n, a); };
|
153
|
-
Arcs *arcs( void ) { return &arcsp; };
|
154
|
-
const Arcs *arcs( void ) const { return &arcsp; };
|
155
|
-
Node *forward( void ) { return forwardp; };
|
156
|
-
void clear_visited( NodeHashSet &nodeset );
|
157
|
-
bool was_visited( VType vmark ) {
|
158
|
-
if (visited == vmark)
|
159
|
-
return true;
|
160
|
-
visited = vmark;
|
161
|
-
return false;
|
162
|
-
};
|
163
|
-
bool check_visited( VType vm ) // leaves the visited flag unchanged
|
164
|
-
{ return (visited==vm); };
|
165
|
-
};
|
166
|
-
|
167
|
-
|
168
|
-
/***************** class Node2Int *********************************/
|
169
|
-
|
170
|
-
class Node2Int {
|
171
|
-
|
172
|
-
struct hashf {
|
173
|
-
size_t operator()(const Node *node) const {
|
174
|
-
return (size_t)node;
|
175
|
-
}
|
176
|
-
};
|
177
|
-
struct equalf {
|
178
|
-
int operator()(const Node *n1, const Node *n2) const {
|
179
|
-
return (n1 == n2);
|
180
|
-
}
|
181
|
-
};
|
182
|
-
typedef hash_map<Node*, int, hashf, equalf> NL;
|
183
|
-
|
184
|
-
private:
|
185
|
-
int current_number;
|
186
|
-
NL number;
|
187
|
-
|
188
|
-
public:
|
189
|
-
int &operator[]( Node *node ) {
|
190
|
-
NL::iterator it=number.find(node);
|
191
|
-
if (it == number.end())
|
192
|
-
return number.insert(NL::value_type(node, 0)).first->second;
|
193
|
-
return it->second;
|
194
|
-
};
|
195
|
-
};
|
196
|
-
|
197
|
-
|
198
|
-
/***************** class NodeNumbering ****************************/
|
199
|
-
|
200
|
-
class NodeNumbering {
|
201
|
-
|
202
|
-
private:
|
203
|
-
std::vector<Node*> nodes;
|
204
|
-
Node2Int nummap;
|
205
|
-
void number_node( Node*, Transducer& );
|
206
|
-
|
207
|
-
public:
|
208
|
-
NodeNumbering( Transducer& );
|
209
|
-
int operator[]( Node *node ) { return nummap[node]; };
|
210
|
-
size_t number_of_nodes( void ) { return nodes.size(); };
|
211
|
-
Node *get_node( size_t n ) { return nodes[n]; };
|
212
|
-
};
|
213
|
-
|
214
|
-
|
215
|
-
/***************** class PairMapping ****************************/
|
216
|
-
|
217
|
-
class PairMapping {
|
218
|
-
// This class is used to map a node pair from two transducers
|
219
|
-
// to a single node in another transducer
|
220
|
-
|
221
|
-
typedef std::pair<Node*, Node*> NodePair;
|
222
|
-
|
223
|
-
private:
|
224
|
-
struct hashf {
|
225
|
-
size_t operator()(const NodePair p) const {
|
226
|
-
return (size_t)p.first ^ (size_t)p.second;
|
227
|
-
}
|
228
|
-
};
|
229
|
-
struct equalf {
|
230
|
-
int operator()(const NodePair p1, const NodePair p2) const {
|
231
|
-
return (p1.first==p2.first && p1.second == p2.second);
|
232
|
-
}
|
233
|
-
};
|
234
|
-
typedef hash_map<NodePair, Node*, hashf, equalf> PairMap;
|
235
|
-
PairMap pm;
|
236
|
-
|
237
|
-
public:
|
238
|
-
typedef PairMap::iterator iterator;
|
239
|
-
iterator begin( void ) { return pm.begin(); };
|
240
|
-
iterator end( void ) { return pm.end(); };
|
241
|
-
iterator find( Node *n1, Node *n2 )
|
242
|
-
{ return pm.find( NodePair(n1,n2) ); };
|
243
|
-
Node* &operator[]( NodePair p ) { return pm.operator[](p); };
|
244
|
-
|
245
|
-
};
|
246
|
-
|
247
|
-
|
248
|
-
/***************** class Transducer *******************************/
|
249
|
-
|
250
|
-
class Transducer {
|
251
|
-
|
252
|
-
private:
|
253
|
-
bool deterministic;
|
254
|
-
bool minimised;
|
255
|
-
Node root;
|
256
|
-
Mem mem;
|
257
|
-
|
258
|
-
typedef std::set<Label, Label::label_cmp> LabelSet;
|
259
|
-
typedef hash_map<Character, char*> SymbolMap;
|
260
|
-
|
261
|
-
void incr_vmark( void ) {
|
262
|
-
if (++vmark == 0) {
|
263
|
-
NodeHashSet nodes;
|
264
|
-
root.clear_visited( nodes );
|
265
|
-
fprintf(stderr,"clearing flags\n");
|
266
|
-
vmark = 1;
|
267
|
-
}
|
268
|
-
};
|
269
|
-
void reverse_node( Node *old_node, Transducer *new_node );
|
270
|
-
Label recode_label( Label, bool lswitch, bool recode, Alphabet& );
|
271
|
-
Node *copy_nodes( Node *n, Transducer *a,
|
272
|
-
bool lswitch=false, bool recode=false );
|
273
|
-
void rec_cat_nodes( Node*, Node* );
|
274
|
-
bool productive_node( Node* );
|
275
|
-
bool prune_nodes( Node* );
|
276
|
-
void negate_nodes( Node*, Node* );
|
277
|
-
bool compare_nodes( Node *node, Node *node2, Transducer &a2 );
|
278
|
-
void map_nodes( Node *node, Node *node2, Transducer *a, Level level );
|
279
|
-
void freely_insert_at_node( Node *node, Label l );
|
280
|
-
int print_strings_node(Node *node, char *buffer, int pos, FILE *file, bool);
|
281
|
-
bool infinitely_ambiguous_node( Node* );
|
282
|
-
bool is_cyclic_node( Node*, NodeHashSet &visited );
|
283
|
-
bool is_automaton_node( Node* );
|
284
|
-
bool generate1( Node*, Node2Int&, char*, int, char*, int, FILE* );
|
285
|
-
void store_symbols( Node*, SymbolMap&, LabelSet& );
|
286
|
-
|
287
|
-
void splice_nodes(Node*, Node*, Label sl, Transducer*, Transducer*);
|
288
|
-
void splice_arc( Node*, Node*, Node*, Transducer* );
|
289
|
-
void enumerate_paths_node( Node*, std::vector<Label>&, NodeHashSet&,
|
290
|
-
std::vector<Transducer*>& );
|
291
|
-
void replace_char2( Node*, Node*, Character, Character, Transducer* );
|
292
|
-
Node *create_node( std::vector<Node*>&, char*, size_t line );
|
293
|
-
void read_transducer_binary( FILE* );
|
294
|
-
void read_transducer_text( FILE* );
|
295
|
-
|
296
|
-
public:
|
297
|
-
VType vmark;
|
298
|
-
Alphabet alphabet; // The set of all labels, i.e. character pairs
|
299
|
-
|
300
|
-
Transducer( void ) : root(), mem()
|
301
|
-
{ vmark = 0; deterministic = minimised = false; };
|
302
|
-
// convertion of a string to an transducer
|
303
|
-
Transducer( char *s, const Alphabet *a=NULL, bool extended=false );
|
304
|
-
// reads a word list from a file and stores it in the transducer
|
305
|
-
Transducer( std::istream&, const Alphabet *a=NULL, bool verbose=false );
|
306
|
-
// reads a transducer from a binary or text file
|
307
|
-
Transducer( FILE*, bool binary=true );
|
308
|
-
// turns a sequence of labels into a transducer
|
309
|
-
Transducer( std::vector<Label>& );
|
310
|
-
|
311
|
-
Node *root_node( void ) { return &root; }; // returns the root node
|
312
|
-
const Node *root_node( void ) const { return &root; }; // returns the root node
|
313
|
-
Node *new_node( void ); // memory alocation for a new node
|
314
|
-
Arc *new_arc( Label l, Node *target ); // memory alocation for a new arc
|
315
|
-
void add_string( char *s, bool extended=false, Alphabet *a=NULL );
|
316
|
-
void complete_alphabet( void );
|
317
|
-
void minimise_alphabet( void );
|
318
|
-
void prune( void ); // remove unnecessary arcs
|
319
|
-
|
320
|
-
int print_strings( FILE*, bool with_brackets=true ); //enumerate all strings
|
321
|
-
|
322
|
-
bool analyze_string( char *s, FILE *file, bool with_brackets=true );
|
323
|
-
bool generate_string( char *s, FILE *file, bool with_brackets=true );
|
324
|
-
bool generate( FILE *file, bool separate=false );
|
325
|
-
|
326
|
-
void clear( void ); // clears the transducer. The resulting transducer
|
327
|
-
// is like one created with Transducer()
|
328
|
-
// copy duplicates an transducer
|
329
|
-
// if called with a non-zero argument, upper and lower level are switched
|
330
|
-
Transducer ©( bool lswitch=false, const Alphabet *al=NULL );
|
331
|
-
Transducer &switch_levels( void ) { return copy( true ); };
|
332
|
-
Transducer &splice( Label l, Transducer *a);
|
333
|
-
Transducer &freely_insert( Label l );
|
334
|
-
Transducer &replace_char( Character c, Character nc );
|
335
|
-
Transducer &level( Level );
|
336
|
-
Transducer &lower_level( void ) // creates an transducer for the "lower" language
|
337
|
-
{ return level(lower); };
|
338
|
-
Transducer &upper_level( void ) // creates an transducer for the "upper" language
|
339
|
-
{ return level(upper); };
|
340
|
-
Transducer &determinise( void ); // creates a deterministic transducer
|
341
|
-
Transducer &minimise( bool verbose=true ); // creates a minimised transducer
|
342
|
-
void store( FILE* ); // stores the transducer in binary format
|
343
|
-
void store_lowmem( FILE* );
|
344
|
-
void read( FILE* ); // reads an transducer in binary format
|
345
|
-
bool enumerate_paths( std::vector<Transducer*>& );
|
346
|
-
|
347
|
-
Transducer &reverse( void ); // reverse language
|
348
|
-
Transducer &operator|( Transducer& ); // union, disjunction
|
349
|
-
Transducer &operator+( Transducer& ); // concatenation
|
350
|
-
Transducer &operator/( Transducer& ); // subtraction
|
351
|
-
Transducer &operator&( Transducer& ); // intersection, conjunction
|
352
|
-
Transducer &operator||( Transducer& ); // composition
|
353
|
-
Transducer &operator!( void ); // complement, negation
|
354
|
-
Transducer &kleene_star( void );
|
355
|
-
bool operator==( Transducer& ); // minimises its arguments first
|
356
|
-
|
357
|
-
bool is_cyclic( void );
|
358
|
-
bool is_automaton( void );
|
359
|
-
bool is_infinitely_ambiguous( void );
|
360
|
-
bool is_empty( void ); // For efficiency reasons, these functions
|
361
|
-
bool generates_empty_string( void );// are better called after minimisation
|
362
|
-
|
363
|
-
friend class NodeNumbering;
|
364
|
-
friend class EdgeCount;
|
365
|
-
friend class MakeCompactTransducer;
|
366
|
-
friend std::ostream &operator<<(std::ostream&, Transducer&);
|
367
|
-
};
|
368
|
-
|
369
|
-
#endif
|
@@ -1,1842 +0,0 @@
|
|
1
|
-
/*******************************************************************/
|
2
|
-
/* */
|
3
|
-
/* FILE interface.C */
|
4
|
-
/* MODULE interface */
|
5
|
-
/* PROGRAM SFST */
|
6
|
-
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
|
7
|
-
/* */
|
8
|
-
/*******************************************************************/
|
9
|
-
|
10
|
-
#include "interface.h"
|
11
|
-
|
12
|
-
#include <fstream>
|
13
|
-
using std::ifstream;
|
14
|
-
using std::ofstream;
|
15
|
-
|
16
|
-
#include <set>
|
17
|
-
using std::set;
|
18
|
-
|
19
|
-
#include "sgi.h"
|
20
|
-
|
21
|
-
using std::cerr;
|
22
|
-
using std::cout;
|
23
|
-
using std::vector;
|
24
|
-
|
25
|
-
struct ltstr {
|
26
|
-
bool operator()(const char* s1, const char* s2) const
|
27
|
-
{ return strcmp(s1, s2) < 0; }
|
28
|
-
};
|
29
|
-
|
30
|
-
struct eqstr {
|
31
|
-
bool operator()(const char* s1, const char* s2) const
|
32
|
-
{ return strcmp(s1, s2) == 0; }
|
33
|
-
};
|
34
|
-
|
35
|
-
typedef set<char*, ltstr> RVarSet;
|
36
|
-
|
37
|
-
typedef hash_map<char*, Transducer*, hash<const char*>, eqstr> VarMap;
|
38
|
-
|
39
|
-
typedef hash_map<char*, Range*, hash<const char*>, eqstr> SVarMap;
|
40
|
-
|
41
|
-
bool Verbose=false;
|
42
|
-
|
43
|
-
Alphabet TheAlphabet;
|
44
|
-
|
45
|
-
static VarMap VM;
|
46
|
-
static SVarMap SVM;
|
47
|
-
static RVarSet RS;
|
48
|
-
static RVarSet RSS;
|
49
|
-
static int Alphabet_Defined=0;
|
50
|
-
|
51
|
-
|
52
|
-
/*******************************************************************/
|
53
|
-
/* */
|
54
|
-
/* error */
|
55
|
-
/* */
|
56
|
-
/*******************************************************************/
|
57
|
-
|
58
|
-
static void error( const char *message )
|
59
|
-
|
60
|
-
{
|
61
|
-
cerr << "\nError: " << message << "\naborted.\n";
|
62
|
-
exit(1);
|
63
|
-
}
|
64
|
-
|
65
|
-
|
66
|
-
/*******************************************************************/
|
67
|
-
/* */
|
68
|
-
/* error2 */
|
69
|
-
/* */
|
70
|
-
/*******************************************************************/
|
71
|
-
|
72
|
-
void error2( char *message, char *input )
|
73
|
-
|
74
|
-
{
|
75
|
-
cerr << "\nError: " << message << ": " << input << "\naborted.\n";
|
76
|
-
exit(1);
|
77
|
-
}
|
78
|
-
|
79
|
-
|
80
|
-
/*******************************************************************/
|
81
|
-
/* */
|
82
|
-
/* symbol_code */
|
83
|
-
/* */
|
84
|
-
/*******************************************************************/
|
85
|
-
|
86
|
-
Character symbol_code( char *symbol )
|
87
|
-
|
88
|
-
{
|
89
|
-
int c=TheAlphabet.symbol2code(symbol);
|
90
|
-
if (c == EOF)
|
91
|
-
c = TheAlphabet.add_symbol( symbol );
|
92
|
-
free(symbol);
|
93
|
-
return (Character)c;
|
94
|
-
}
|
95
|
-
|
96
|
-
|
97
|
-
/*******************************************************************/
|
98
|
-
/* */
|
99
|
-
/* character_code */
|
100
|
-
/* */
|
101
|
-
/*******************************************************************/
|
102
|
-
|
103
|
-
Character character_code( unsigned int uc )
|
104
|
-
|
105
|
-
{
|
106
|
-
if (TheAlphabet.utf8)
|
107
|
-
return symbol_code(fst_strdup(int2utf8(uc)));
|
108
|
-
|
109
|
-
unsigned char *buffer=(unsigned char*)malloc(2);
|
110
|
-
buffer[0] = (unsigned char)uc;
|
111
|
-
buffer[1] = 0;
|
112
|
-
return symbol_code((char*)buffer);
|
113
|
-
}
|
114
|
-
|
115
|
-
|
116
|
-
/*******************************************************************/
|
117
|
-
/* */
|
118
|
-
/* add_value */
|
119
|
-
/* */
|
120
|
-
/*******************************************************************/
|
121
|
-
|
122
|
-
Range *add_value( Character c, Range *r )
|
123
|
-
|
124
|
-
{
|
125
|
-
Range *result=new Range;
|
126
|
-
result->character = c;
|
127
|
-
result->next = r;
|
128
|
-
return result;
|
129
|
-
}
|
130
|
-
|
131
|
-
|
132
|
-
/*******************************************************************/
|
133
|
-
/* */
|
134
|
-
/* add_values */
|
135
|
-
/* */
|
136
|
-
/*******************************************************************/
|
137
|
-
|
138
|
-
Range *add_values( unsigned int c1, unsigned int c2, Range *r )
|
139
|
-
|
140
|
-
{
|
141
|
-
for( unsigned int c=c2; c>=c1; c-- )
|
142
|
-
r = add_value(character_code(c), r);
|
143
|
-
return r;
|
144
|
-
}
|
145
|
-
|
146
|
-
|
147
|
-
/*******************************************************************/
|
148
|
-
/* */
|
149
|
-
/* append_values */
|
150
|
-
/* */
|
151
|
-
/*******************************************************************/
|
152
|
-
|
153
|
-
Range *append_values( Range *r2, Range *r )
|
154
|
-
|
155
|
-
{
|
156
|
-
if (r2 == NULL)
|
157
|
-
return r;
|
158
|
-
return add_value(r2->character, append_values(r2->next, r));
|
159
|
-
}
|
160
|
-
|
161
|
-
|
162
|
-
/*******************************************************************/
|
163
|
-
/* */
|
164
|
-
/* add_var_values */
|
165
|
-
/* */
|
166
|
-
/*******************************************************************/
|
167
|
-
|
168
|
-
Range *add_var_values( char *name, Range *r )
|
169
|
-
|
170
|
-
{
|
171
|
-
return append_values(svar_value(name), r);
|
172
|
-
}
|
173
|
-
|
174
|
-
|
175
|
-
/*******************************************************************/
|
176
|
-
/* */
|
177
|
-
/* in_range */
|
178
|
-
/* */
|
179
|
-
/*******************************************************************/
|
180
|
-
|
181
|
-
static bool in_range( unsigned int c, Range *r )
|
182
|
-
|
183
|
-
{
|
184
|
-
while (r) {
|
185
|
-
if (r->character == c)
|
186
|
-
return true;
|
187
|
-
r = r->next;
|
188
|
-
}
|
189
|
-
return false;
|
190
|
-
}
|
191
|
-
|
192
|
-
|
193
|
-
/*******************************************************************/
|
194
|
-
/* */
|
195
|
-
/* free_values */
|
196
|
-
/* */
|
197
|
-
/*******************************************************************/
|
198
|
-
|
199
|
-
static void free_values( Range *r )
|
200
|
-
|
201
|
-
{
|
202
|
-
if (r) {
|
203
|
-
free_values(r->next);
|
204
|
-
delete r;
|
205
|
-
}
|
206
|
-
}
|
207
|
-
|
208
|
-
|
209
|
-
/*******************************************************************/
|
210
|
-
/* */
|
211
|
-
/* free_values */
|
212
|
-
/* */
|
213
|
-
/*******************************************************************/
|
214
|
-
|
215
|
-
static void free_values( Ranges *r )
|
216
|
-
|
217
|
-
{
|
218
|
-
if (r) {
|
219
|
-
free_values(r->next);
|
220
|
-
delete r;
|
221
|
-
}
|
222
|
-
}
|
223
|
-
|
224
|
-
|
225
|
-
/*******************************************************************/
|
226
|
-
/* */
|
227
|
-
/* free_contexts */
|
228
|
-
/* */
|
229
|
-
/*******************************************************************/
|
230
|
-
|
231
|
-
static void free_contexts( Contexts *c )
|
232
|
-
|
233
|
-
{
|
234
|
-
if (c) {
|
235
|
-
free_contexts(c->next);
|
236
|
-
delete c;
|
237
|
-
}
|
238
|
-
}
|
239
|
-
|
240
|
-
|
241
|
-
/*******************************************************************/
|
242
|
-
/* */
|
243
|
-
/* copy_values */
|
244
|
-
/* */
|
245
|
-
/*******************************************************************/
|
246
|
-
|
247
|
-
static Range *copy_values( const Range *r )
|
248
|
-
|
249
|
-
{
|
250
|
-
if (r == NULL)
|
251
|
-
return NULL;
|
252
|
-
return add_value( r->character, copy_values(r->next));
|
253
|
-
}
|
254
|
-
|
255
|
-
|
256
|
-
/*******************************************************************/
|
257
|
-
/* */
|
258
|
-
/* complement_range */
|
259
|
-
/* */
|
260
|
-
/*******************************************************************/
|
261
|
-
|
262
|
-
Range *complement_range( Range *r )
|
263
|
-
|
264
|
-
{
|
265
|
-
vector<Character> sym;
|
266
|
-
for( Range *p=r; p; p=p->next)
|
267
|
-
sym.push_back( p->character );
|
268
|
-
free_values( r );
|
269
|
-
|
270
|
-
TheAlphabet.complement(sym);
|
271
|
-
if (sym.size() == 0)
|
272
|
-
error("Empty character range!");
|
273
|
-
|
274
|
-
|
275
|
-
Range *result=NULL;
|
276
|
-
for( size_t i=0; i<sym.size(); i++ ) {
|
277
|
-
Range *tmp = new Range;
|
278
|
-
tmp->character = sym[i];
|
279
|
-
tmp->next = result;
|
280
|
-
result = tmp;
|
281
|
-
}
|
282
|
-
|
283
|
-
return result;
|
284
|
-
}
|
285
|
-
|
286
|
-
|
287
|
-
/*******************************************************************/
|
288
|
-
/* */
|
289
|
-
/* make_transducer */
|
290
|
-
/* */
|
291
|
-
/*******************************************************************/
|
292
|
-
|
293
|
-
static Transducer *make_transducer( Range *r1, Range *r2 )
|
294
|
-
|
295
|
-
{
|
296
|
-
Transducer *t=new Transducer();
|
297
|
-
Node *node=t->new_node();
|
298
|
-
node->set_final(1);
|
299
|
-
|
300
|
-
if (r1 == NULL || r2 == NULL) {
|
301
|
-
if (!Alphabet_Defined)
|
302
|
-
error("The wildcard symbol '.' requires the definition of an alphabet");
|
303
|
-
|
304
|
-
// one of the ranges was '.'
|
305
|
-
for(Alphabet::const_iterator it=TheAlphabet.begin();
|
306
|
-
it!=TheAlphabet.end(); it++)
|
307
|
-
if ((r1 == NULL || in_range(it->lower_char(), r1)) &&
|
308
|
-
(r2 == NULL || in_range(it->upper_char(), r2)))
|
309
|
-
t->root_node()->add_arc( *it, node, t );
|
310
|
-
}
|
311
|
-
else {
|
312
|
-
for (;;) {
|
313
|
-
Label l(r1->character, r2->character);
|
314
|
-
// TheAlphabet.insert(l);
|
315
|
-
t->root_node()->add_arc( l, node, t );
|
316
|
-
if (!r1->next && !r2->next)
|
317
|
-
break;
|
318
|
-
if (r1->next)
|
319
|
-
r1 = r1->next;
|
320
|
-
if (r2->next)
|
321
|
-
r2 = r2->next;
|
322
|
-
}
|
323
|
-
}
|
324
|
-
|
325
|
-
return t;
|
326
|
-
}
|
327
|
-
|
328
|
-
|
329
|
-
/*******************************************************************/
|
330
|
-
/* */
|
331
|
-
/* empty_transducer */
|
332
|
-
/* */
|
333
|
-
/*******************************************************************/
|
334
|
-
|
335
|
-
static Transducer *empty_transducer()
|
336
|
-
|
337
|
-
{
|
338
|
-
Transducer *t=new Transducer();
|
339
|
-
t->root_node()->set_final(1);
|
340
|
-
|
341
|
-
return t;
|
342
|
-
}
|
343
|
-
|
344
|
-
|
345
|
-
/*******************************************************************/
|
346
|
-
/* */
|
347
|
-
/* one_label_transducer */
|
348
|
-
/* */
|
349
|
-
/*******************************************************************/
|
350
|
-
|
351
|
-
static Transducer *one_label_transducer( Label l )
|
352
|
-
|
353
|
-
{
|
354
|
-
Transducer *t = new Transducer();
|
355
|
-
Node *last = t->new_node();
|
356
|
-
t->root_node()->add_arc( l, last, t );
|
357
|
-
last->set_final(1);
|
358
|
-
|
359
|
-
return t;
|
360
|
-
}
|
361
|
-
|
362
|
-
|
363
|
-
/*******************************************************************/
|
364
|
-
/* */
|
365
|
-
/* new_transducer */
|
366
|
-
/* */
|
367
|
-
/*******************************************************************/
|
368
|
-
|
369
|
-
Transducer *new_transducer( Range *r1, Range *r2 )
|
370
|
-
|
371
|
-
{
|
372
|
-
Transducer *t=make_transducer( r1, r2);
|
373
|
-
if (r1 != r2)
|
374
|
-
free_values(r1);
|
375
|
-
free_values(r2);
|
376
|
-
return t;
|
377
|
-
}
|
378
|
-
|
379
|
-
|
380
|
-
/*******************************************************************/
|
381
|
-
/* */
|
382
|
-
/* read_words */
|
383
|
-
/* */
|
384
|
-
/*******************************************************************/
|
385
|
-
|
386
|
-
Transducer *read_words( char *filename )
|
387
|
-
|
388
|
-
{
|
389
|
-
if (Verbose)
|
390
|
-
fprintf(stderr,"\nreading words from %s...", filename);
|
391
|
-
ifstream is(filename);
|
392
|
-
if (!is.is_open()) {
|
393
|
-
static char message[1000];
|
394
|
-
sprintf(message,"Error: Cannot open file \"%s\"!", filename);
|
395
|
-
throw message;
|
396
|
-
}
|
397
|
-
free( filename );
|
398
|
-
Transducer *t = new Transducer(is, &TheAlphabet, Verbose);
|
399
|
-
is.close();
|
400
|
-
TheAlphabet.insert_symbols(t->alphabet);
|
401
|
-
if (Verbose)
|
402
|
-
fprintf(stderr,"finished\n");
|
403
|
-
return t;
|
404
|
-
}
|
405
|
-
|
406
|
-
|
407
|
-
/*******************************************************************/
|
408
|
-
/* */
|
409
|
-
/* read_transducer */
|
410
|
-
/* */
|
411
|
-
/*******************************************************************/
|
412
|
-
|
413
|
-
Transducer *read_transducer( char *filename )
|
414
|
-
|
415
|
-
{
|
416
|
-
if (Verbose)
|
417
|
-
fprintf(stderr,"\nreading transducer from %s...", filename);
|
418
|
-
FILE *file = fopen(filename,"rb");
|
419
|
-
if (file == NULL) {
|
420
|
-
static char message[1000];
|
421
|
-
sprintf(message,"Error: Cannot open file \"%s\"!",filename);
|
422
|
-
throw message;
|
423
|
-
}
|
424
|
-
Transducer t(file);
|
425
|
-
fclose(file);
|
426
|
-
if (t.alphabet.utf8 != TheAlphabet.utf8) {
|
427
|
-
static char message[1000];
|
428
|
-
sprintf(message,"Error: incompatible character encoding in file \"%s\"!",
|
429
|
-
filename);
|
430
|
-
throw message;
|
431
|
-
}
|
432
|
-
free( filename );
|
433
|
-
Transducer *nt = &t.copy(false, &TheAlphabet);
|
434
|
-
TheAlphabet.insert_symbols(nt->alphabet);
|
435
|
-
if (Verbose)
|
436
|
-
fprintf(stderr,"finished\n");
|
437
|
-
return nt;
|
438
|
-
}
|
439
|
-
|
440
|
-
|
441
|
-
/*******************************************************************/
|
442
|
-
/* */
|
443
|
-
/* def_alphabet */
|
444
|
-
/* */
|
445
|
-
/*******************************************************************/
|
446
|
-
|
447
|
-
void def_alphabet( Transducer *t )
|
448
|
-
|
449
|
-
{
|
450
|
-
t = explode(t);
|
451
|
-
t = minimise(t);
|
452
|
-
t->alphabet.clear_char_pairs();
|
453
|
-
t->complete_alphabet();
|
454
|
-
TheAlphabet.clear_char_pairs();
|
455
|
-
TheAlphabet.copy(t->alphabet);
|
456
|
-
Alphabet_Defined = 1;
|
457
|
-
delete t;
|
458
|
-
}
|
459
|
-
|
460
|
-
|
461
|
-
/*******************************************************************/
|
462
|
-
/* */
|
463
|
-
/* def_svar definition of a value range variable */
|
464
|
-
/* */
|
465
|
-
/*******************************************************************/
|
466
|
-
|
467
|
-
bool def_svar( char *name, Range *r )
|
468
|
-
|
469
|
-
{
|
470
|
-
// delete the old value of the variable
|
471
|
-
SVarMap::iterator it=SVM.find(name);
|
472
|
-
if (it != SVM.end()) {
|
473
|
-
char *n=it->first;
|
474
|
-
Range *v=it->second;
|
475
|
-
SVM.erase(it);
|
476
|
-
delete v;
|
477
|
-
free(n);
|
478
|
-
}
|
479
|
-
SVM[name] = r;
|
480
|
-
return r == NULL;
|
481
|
-
}
|
482
|
-
|
483
|
-
|
484
|
-
/*******************************************************************/
|
485
|
-
/* */
|
486
|
-
/* svar_value */
|
487
|
-
/* */
|
488
|
-
/*******************************************************************/
|
489
|
-
|
490
|
-
Range *svar_value( char *name )
|
491
|
-
|
492
|
-
{
|
493
|
-
SVarMap::iterator it=SVM.find(name);
|
494
|
-
if (it == SVM.end())
|
495
|
-
error2("undefined variable", name);
|
496
|
-
free(name);
|
497
|
-
return copy_values(it->second);
|
498
|
-
}
|
499
|
-
|
500
|
-
|
501
|
-
/*******************************************************************/
|
502
|
-
/* */
|
503
|
-
/* rsvar_value */
|
504
|
-
/* */
|
505
|
-
/*******************************************************************/
|
506
|
-
|
507
|
-
Range *rsvar_value( char *name )
|
508
|
-
|
509
|
-
{
|
510
|
-
if (RSS.find(name) == RSS.end())
|
511
|
-
RSS.insert(fst_strdup(name));
|
512
|
-
return add_value(symbol_code(name), NULL);
|
513
|
-
}
|
514
|
-
|
515
|
-
|
516
|
-
/*******************************************************************/
|
517
|
-
/* */
|
518
|
-
/* def_var definition of an transducer variable */
|
519
|
-
/* */
|
520
|
-
/*******************************************************************/
|
521
|
-
|
522
|
-
bool def_var( char *name, Transducer *t )
|
523
|
-
|
524
|
-
{
|
525
|
-
// delete the old value of the variable
|
526
|
-
VarMap::iterator it=VM.find(name);
|
527
|
-
if (it != VM.end()) {
|
528
|
-
char *n=it->first;
|
529
|
-
Transducer *v=it->second;
|
530
|
-
VM.erase(it);
|
531
|
-
delete v;
|
532
|
-
free(n);
|
533
|
-
}
|
534
|
-
|
535
|
-
t = explode(t);
|
536
|
-
t = minimise(t);
|
537
|
-
|
538
|
-
VM[name] = t;
|
539
|
-
return t->is_empty();
|
540
|
-
}
|
541
|
-
|
542
|
-
|
543
|
-
/*******************************************************************/
|
544
|
-
/* */
|
545
|
-
/* def_rvar definition of an agreement variable for automata */
|
546
|
-
/* */
|
547
|
-
/*******************************************************************/
|
548
|
-
|
549
|
-
bool def_rvar( char *name, Transducer *t )
|
550
|
-
|
551
|
-
{
|
552
|
-
if (t->is_cyclic())
|
553
|
-
error2("cyclic transducer assigned to", name);
|
554
|
-
return def_var( name, t );
|
555
|
-
}
|
556
|
-
|
557
|
-
|
558
|
-
/*******************************************************************/
|
559
|
-
/* */
|
560
|
-
/* var_value */
|
561
|
-
/* */
|
562
|
-
/*******************************************************************/
|
563
|
-
|
564
|
-
Transducer *var_value( char *name )
|
565
|
-
|
566
|
-
{
|
567
|
-
VarMap::iterator it=VM.find(name);
|
568
|
-
if (it == VM.end())
|
569
|
-
error2("undefined variable", name);
|
570
|
-
free(name);
|
571
|
-
return &(it->second->copy());
|
572
|
-
}
|
573
|
-
|
574
|
-
|
575
|
-
/*******************************************************************/
|
576
|
-
/* */
|
577
|
-
/* rvar_value */
|
578
|
-
/* */
|
579
|
-
/*******************************************************************/
|
580
|
-
|
581
|
-
Transducer *rvar_value( char *name )
|
582
|
-
|
583
|
-
{
|
584
|
-
if (RS.find(name) == RS.end())
|
585
|
-
RS.insert(fst_strdup(name));
|
586
|
-
Range *r=add_value(symbol_code(name), NULL);
|
587
|
-
return new_transducer(r,r);
|
588
|
-
}
|
589
|
-
|
590
|
-
|
591
|
-
/*******************************************************************/
|
592
|
-
/* */
|
593
|
-
/* explode */
|
594
|
-
/* */
|
595
|
-
/*******************************************************************/
|
596
|
-
|
597
|
-
Transducer *explode( Transducer *t )
|
598
|
-
|
599
|
-
{
|
600
|
-
if (RS.size() == 0 && RSS.size() == 0)
|
601
|
-
return t;
|
602
|
-
|
603
|
-
t = minimise(t);
|
604
|
-
|
605
|
-
vector<char*> name;
|
606
|
-
for( RVarSet::iterator it=RS.begin(); it!=RS.end(); it++)
|
607
|
-
name.push_back(*it);
|
608
|
-
RS.clear();
|
609
|
-
|
610
|
-
// replace all agreement variables
|
611
|
-
for( size_t i=0; i<name.size(); i++ ) {
|
612
|
-
Transducer *nt = NULL;
|
613
|
-
Label l(TheAlphabet.symbol2code(name[i]));
|
614
|
-
Transducer *vt=var_value(name[i]);
|
615
|
-
|
616
|
-
// enumerate all paths of the transducer
|
617
|
-
vector<Transducer*> it;
|
618
|
-
vt->enumerate_paths(it);
|
619
|
-
delete vt;
|
620
|
-
|
621
|
-
// insert each path
|
622
|
-
for( size_t i=0; i<it.size(); i++ ) {
|
623
|
-
|
624
|
-
// insertion
|
625
|
-
Transducer *t1 = &t->splice(l, it[i]);
|
626
|
-
delete it[i];
|
627
|
-
|
628
|
-
if (nt == NULL)
|
629
|
-
nt = t1;
|
630
|
-
else
|
631
|
-
nt = disjunction(nt, t1);
|
632
|
-
}
|
633
|
-
delete t;
|
634
|
-
t = nt;
|
635
|
-
}
|
636
|
-
|
637
|
-
name.clear();
|
638
|
-
for( RVarSet::iterator it=RSS.begin(); it!=RSS.end(); it++)
|
639
|
-
name.push_back(*it);
|
640
|
-
RSS.clear();
|
641
|
-
|
642
|
-
// replace all agreement variables
|
643
|
-
for( size_t i=0; i<name.size(); i++ ) {
|
644
|
-
Transducer *nt = NULL;
|
645
|
-
Character c=TheAlphabet.symbol2code(name[i]);
|
646
|
-
Range *r=svar_value(name[i]);
|
647
|
-
|
648
|
-
// insert each character
|
649
|
-
while (r != NULL) {
|
650
|
-
|
651
|
-
// insertion
|
652
|
-
Transducer *t1 = &t->replace_char(c, r->character);
|
653
|
-
|
654
|
-
if (nt == NULL)
|
655
|
-
nt = t1;
|
656
|
-
else
|
657
|
-
nt = disjunction(nt, t1);
|
658
|
-
|
659
|
-
Range *next = r->next;
|
660
|
-
delete r;
|
661
|
-
r = next;
|
662
|
-
}
|
663
|
-
delete t;
|
664
|
-
t = nt;
|
665
|
-
}
|
666
|
-
|
667
|
-
return t;
|
668
|
-
}
|
669
|
-
|
670
|
-
|
671
|
-
/*******************************************************************/
|
672
|
-
/* */
|
673
|
-
/* catenate */
|
674
|
-
/* */
|
675
|
-
/*******************************************************************/
|
676
|
-
|
677
|
-
Transducer *catenate( Transducer *t1, Transducer *t2 )
|
678
|
-
|
679
|
-
{
|
680
|
-
Transducer *t = &(*t1 + *t2);
|
681
|
-
delete t1;
|
682
|
-
delete t2;
|
683
|
-
return t;
|
684
|
-
}
|
685
|
-
|
686
|
-
|
687
|
-
/*******************************************************************/
|
688
|
-
/* */
|
689
|
-
/* add_range */
|
690
|
-
/* */
|
691
|
-
/*******************************************************************/
|
692
|
-
|
693
|
-
Ranges *add_range( Range *r, Ranges *l )
|
694
|
-
|
695
|
-
{
|
696
|
-
Ranges *result = new Ranges;
|
697
|
-
result->range = r;
|
698
|
-
result->next = l;
|
699
|
-
return result;
|
700
|
-
}
|
701
|
-
|
702
|
-
|
703
|
-
/*******************************************************************/
|
704
|
-
/* */
|
705
|
-
/* make_mapping */
|
706
|
-
/* */
|
707
|
-
/*******************************************************************/
|
708
|
-
|
709
|
-
Transducer *make_mapping( Ranges *list1, Ranges *list2 )
|
710
|
-
|
711
|
-
{
|
712
|
-
Ranges *l1=list1;
|
713
|
-
Ranges *l2=list2;
|
714
|
-
Transducer *t=new Transducer();
|
715
|
-
|
716
|
-
Node *node=t->root_node();
|
717
|
-
while (l1 && l2) {
|
718
|
-
Node *nn=t->new_node();
|
719
|
-
for( Range *r1=l1->range; r1; r1=r1->next )
|
720
|
-
for( Range *r2=l2->range; r2; r2=r2->next )
|
721
|
-
node->add_arc( Label(r1->character, r2->character), nn, t );
|
722
|
-
node = nn;
|
723
|
-
l1 = l1->next;
|
724
|
-
l2 = l2->next;
|
725
|
-
}
|
726
|
-
while (l1) {
|
727
|
-
Node *nn=t->new_node();
|
728
|
-
for( Range *r1=l1->range; r1; r1=r1->next )
|
729
|
-
node->add_arc( Label(r1->character, Label::epsilon), nn, t );
|
730
|
-
node = nn;
|
731
|
-
l1 = l1->next;
|
732
|
-
}
|
733
|
-
while (l2) {
|
734
|
-
Node *nn=t->new_node();
|
735
|
-
for( Range *r2=l2->range; r2; r2=r2->next )
|
736
|
-
node->add_arc( Label(Label::epsilon, r2->character), nn, t );
|
737
|
-
node = nn;
|
738
|
-
l2 = l2->next;
|
739
|
-
}
|
740
|
-
node->set_final(1);
|
741
|
-
|
742
|
-
free_values(list1);
|
743
|
-
free_values(list2);
|
744
|
-
return t;
|
745
|
-
}
|
746
|
-
|
747
|
-
|
748
|
-
/*******************************************************************/
|
749
|
-
/* */
|
750
|
-
/* disjunction */
|
751
|
-
/* */
|
752
|
-
/*******************************************************************/
|
753
|
-
|
754
|
-
Transducer *disjunction( Transducer *t1, Transducer *t2 )
|
755
|
-
|
756
|
-
{
|
757
|
-
Transducer *t = &(*t1 | *t2);
|
758
|
-
delete t1;
|
759
|
-
delete t2;
|
760
|
-
return t;
|
761
|
-
}
|
762
|
-
|
763
|
-
|
764
|
-
/*******************************************************************/
|
765
|
-
/* */
|
766
|
-
/* conjunction */
|
767
|
-
/* */
|
768
|
-
/*******************************************************************/
|
769
|
-
|
770
|
-
Transducer *conjunction( Transducer *t1, Transducer *t2 )
|
771
|
-
|
772
|
-
{
|
773
|
-
if (RS.size() > 0 || RSS.size() > 0)
|
774
|
-
cerr << "\nWarning: agreement operation inside of conjunction!\n";
|
775
|
-
Transducer *t = &(*t1 & *t2);
|
776
|
-
delete t1;
|
777
|
-
delete t2;
|
778
|
-
return t;
|
779
|
-
}
|
780
|
-
|
781
|
-
|
782
|
-
/*******************************************************************/
|
783
|
-
/* */
|
784
|
-
/* subtraction */
|
785
|
-
/* */
|
786
|
-
/*******************************************************************/
|
787
|
-
|
788
|
-
Transducer *subtraction( Transducer *t1, Transducer *t2 )
|
789
|
-
|
790
|
-
{
|
791
|
-
if (RS.size() > 0 || RSS.size() > 0)
|
792
|
-
cerr << "\nWarning: agreement operation inside of conjunction!\n";
|
793
|
-
Transducer *t = &(*t1 / *t2);
|
794
|
-
delete t1;
|
795
|
-
delete t2;
|
796
|
-
return t;
|
797
|
-
}
|
798
|
-
|
799
|
-
|
800
|
-
/*******************************************************************/
|
801
|
-
/* */
|
802
|
-
/* composition */
|
803
|
-
/* */
|
804
|
-
/*******************************************************************/
|
805
|
-
|
806
|
-
Transducer *composition( Transducer *t1, Transducer *t2 )
|
807
|
-
|
808
|
-
{
|
809
|
-
if (RS.size() > 0 || RSS.size() > 0)
|
810
|
-
cerr << "\nWarning: agreement operation inside of composition!\n";
|
811
|
-
Transducer *t = &(*t1 || *t2);
|
812
|
-
delete t1;
|
813
|
-
delete t2;
|
814
|
-
return t;
|
815
|
-
}
|
816
|
-
|
817
|
-
/*******************************************************************/
|
818
|
-
/* */
|
819
|
-
/* freely_insert */
|
820
|
-
/* */
|
821
|
-
/*******************************************************************/
|
822
|
-
|
823
|
-
Transducer *freely_insert( Transducer *t, Character lc, Character uc )
|
824
|
-
|
825
|
-
{
|
826
|
-
return &t->freely_insert(Label(lc,uc));
|
827
|
-
}
|
828
|
-
|
829
|
-
|
830
|
-
/*******************************************************************/
|
831
|
-
/* */
|
832
|
-
/* negation */
|
833
|
-
/* */
|
834
|
-
/*******************************************************************/
|
835
|
-
|
836
|
-
Transducer *negation( Transducer *t )
|
837
|
-
|
838
|
-
{
|
839
|
-
if (RS.size() > 0 || RSS.size() > 0)
|
840
|
-
cerr << "\nWarning: agreement operation inside of negation!\n";
|
841
|
-
if (!Alphabet_Defined)
|
842
|
-
error("Negation requires the definition of an alphabet");
|
843
|
-
t->alphabet.clear_char_pairs();
|
844
|
-
t->alphabet.copy(TheAlphabet);
|
845
|
-
Transducer *nt = &(!*t);
|
846
|
-
delete t;
|
847
|
-
return nt;
|
848
|
-
}
|
849
|
-
|
850
|
-
|
851
|
-
/*******************************************************************/
|
852
|
-
/* */
|
853
|
-
/* upper_level */
|
854
|
-
/* */
|
855
|
-
/*******************************************************************/
|
856
|
-
|
857
|
-
Transducer *upper_level( Transducer *t )
|
858
|
-
|
859
|
-
{
|
860
|
-
Transducer *nt = &t->upper_level();
|
861
|
-
delete t;
|
862
|
-
return nt;
|
863
|
-
}
|
864
|
-
|
865
|
-
|
866
|
-
/*******************************************************************/
|
867
|
-
/* */
|
868
|
-
/* lower_level */
|
869
|
-
/* */
|
870
|
-
/*******************************************************************/
|
871
|
-
|
872
|
-
Transducer *lower_level( Transducer *t )
|
873
|
-
|
874
|
-
{
|
875
|
-
Transducer *nt = &t->lower_level();
|
876
|
-
delete t;
|
877
|
-
return nt;
|
878
|
-
}
|
879
|
-
|
880
|
-
|
881
|
-
/*******************************************************************/
|
882
|
-
/* */
|
883
|
-
/* minimise */
|
884
|
-
/* */
|
885
|
-
/*******************************************************************/
|
886
|
-
|
887
|
-
Transducer *minimise( Transducer *t )
|
888
|
-
|
889
|
-
{
|
890
|
-
t->alphabet.copy(TheAlphabet);
|
891
|
-
Transducer *nt = &t->minimise( Verbose );
|
892
|
-
delete t;
|
893
|
-
return nt;
|
894
|
-
}
|
895
|
-
|
896
|
-
|
897
|
-
/*******************************************************************/
|
898
|
-
/* */
|
899
|
-
/* switch_levels */
|
900
|
-
/* */
|
901
|
-
/*******************************************************************/
|
902
|
-
|
903
|
-
Transducer *switch_levels( Transducer *t )
|
904
|
-
|
905
|
-
{
|
906
|
-
Transducer *nt = &t->switch_levels();
|
907
|
-
delete t;
|
908
|
-
return nt;
|
909
|
-
}
|
910
|
-
|
911
|
-
|
912
|
-
/*******************************************************************/
|
913
|
-
/* */
|
914
|
-
/* repetition */
|
915
|
-
/* */
|
916
|
-
/*******************************************************************/
|
917
|
-
|
918
|
-
Transducer *repetition( Transducer *t )
|
919
|
-
|
920
|
-
{
|
921
|
-
Transducer *nt = &(t->kleene_star());
|
922
|
-
delete t;
|
923
|
-
return nt;
|
924
|
-
}
|
925
|
-
|
926
|
-
|
927
|
-
/*******************************************************************/
|
928
|
-
/* */
|
929
|
-
/* repetition2 */
|
930
|
-
/* */
|
931
|
-
/*******************************************************************/
|
932
|
-
|
933
|
-
Transducer *repetition2( Transducer *t )
|
934
|
-
|
935
|
-
{
|
936
|
-
Transducer *t1 = &(t->kleene_star());
|
937
|
-
Transducer *nt = &(*t + *t1);
|
938
|
-
delete t;
|
939
|
-
delete t1;
|
940
|
-
return nt;
|
941
|
-
}
|
942
|
-
|
943
|
-
|
944
|
-
/*******************************************************************/
|
945
|
-
/* */
|
946
|
-
/* optional */
|
947
|
-
/* */
|
948
|
-
/*******************************************************************/
|
949
|
-
|
950
|
-
Transducer *optional( Transducer *t )
|
951
|
-
|
952
|
-
{
|
953
|
-
Transducer *nt = &(t->copy());
|
954
|
-
nt->root_node()->set_final(1);
|
955
|
-
delete t;
|
956
|
-
return nt;
|
957
|
-
}
|
958
|
-
|
959
|
-
|
960
|
-
/*******************************************************************/
|
961
|
-
/* */
|
962
|
-
/* add_pi_transitions */
|
963
|
-
/* */
|
964
|
-
/*******************************************************************/
|
965
|
-
|
966
|
-
static void add_pi_transitions( Transducer *t, Node *node, Alphabet &alph )
|
967
|
-
|
968
|
-
{
|
969
|
-
for( Alphabet::const_iterator it=alph.begin(); it!=alph.end(); it++)
|
970
|
-
node->add_arc( *it, node, t );
|
971
|
-
}
|
972
|
-
|
973
|
-
|
974
|
-
/*******************************************************************/
|
975
|
-
/* */
|
976
|
-
/* pi_machine */
|
977
|
-
/* */
|
978
|
-
/*******************************************************************/
|
979
|
-
|
980
|
-
static Transducer *pi_machine( Alphabet &alph )
|
981
|
-
|
982
|
-
{
|
983
|
-
Transducer *t=new Transducer();
|
984
|
-
t->root_node()->set_final(1);
|
985
|
-
add_pi_transitions( t, t->root_node(), alph );
|
986
|
-
return t;
|
987
|
-
}
|
988
|
-
|
989
|
-
|
990
|
-
/*******************************************************************/
|
991
|
-
/* */
|
992
|
-
/* cp */
|
993
|
-
/* */
|
994
|
-
/*******************************************************************/
|
995
|
-
|
996
|
-
static Transducer *cp( Range *lower_range, Range *upper_range )
|
997
|
-
|
998
|
-
{
|
999
|
-
return make_transducer(lower_range, upper_range);
|
1000
|
-
}
|
1001
|
-
|
1002
|
-
|
1003
|
-
/*******************************************************************/
|
1004
|
-
/* */
|
1005
|
-
/* anti_cp */
|
1006
|
-
/* */
|
1007
|
-
/*******************************************************************/
|
1008
|
-
|
1009
|
-
static Transducer *anti_cp( Range *lower_range, Range *upper_range )
|
1010
|
-
|
1011
|
-
{
|
1012
|
-
Transducer *cpt = cp(lower_range, upper_range);
|
1013
|
-
Transducer *t=new Transducer();
|
1014
|
-
Node *node=t->new_node();
|
1015
|
-
|
1016
|
-
node->set_final(1);
|
1017
|
-
for(Alphabet::const_iterator it=TheAlphabet.begin();
|
1018
|
-
it!=TheAlphabet.end(); it++){
|
1019
|
-
Label l=*it;
|
1020
|
-
if (in_range(l.lower_char(), lower_range) &&
|
1021
|
-
!cpt->root_node()->target_node(l))
|
1022
|
-
t->root_node()->add_arc( l, node, t );
|
1023
|
-
}
|
1024
|
-
if (in_range(Label::epsilon, lower_range) &&
|
1025
|
-
!cpt->root_node()->target_node(Label()))
|
1026
|
-
t->root_node()->add_arc( Label(), node, t );
|
1027
|
-
|
1028
|
-
delete cpt;
|
1029
|
-
return t;
|
1030
|
-
}
|
1031
|
-
|
1032
|
-
|
1033
|
-
/*******************************************************************/
|
1034
|
-
/* */
|
1035
|
-
/* twol_right_rule */
|
1036
|
-
/* */
|
1037
|
-
/*******************************************************************/
|
1038
|
-
|
1039
|
-
static Transducer *twol_right_rule( Transducer *lc, Range *lower_range,
|
1040
|
-
Range *upper_range, Transducer *rc )
|
1041
|
-
|
1042
|
-
{
|
1043
|
-
// Build the rule transducer
|
1044
|
-
Transducer *cpt = cp(lower_range, upper_range);
|
1045
|
-
Transducer *pi=pi_machine(TheAlphabet);
|
1046
|
-
|
1047
|
-
// First unwanted language
|
1048
|
-
|
1049
|
-
lc->alphabet.copy(TheAlphabet);
|
1050
|
-
Transducer *notlc = &(!*lc);
|
1051
|
-
Transducer *tmp = &(*notlc + *cpt);
|
1052
|
-
delete notlc;
|
1053
|
-
Transducer *t1 = &(*tmp + *pi);
|
1054
|
-
delete tmp;
|
1055
|
-
|
1056
|
-
// Second unwanted language
|
1057
|
-
rc->alphabet.copy(TheAlphabet);
|
1058
|
-
Transducer *notrc = &(!*rc);
|
1059
|
-
tmp = &(*cpt + *notrc);
|
1060
|
-
delete cpt;
|
1061
|
-
delete notrc;
|
1062
|
-
Transducer *t2 = &(*pi + *tmp);
|
1063
|
-
delete pi;
|
1064
|
-
delete tmp;
|
1065
|
-
|
1066
|
-
tmp = &(*t1|*t2);
|
1067
|
-
delete t1;
|
1068
|
-
delete t2;
|
1069
|
-
|
1070
|
-
tmp->alphabet.copy(TheAlphabet);
|
1071
|
-
t1 = &(!*tmp);
|
1072
|
-
delete tmp;
|
1073
|
-
|
1074
|
-
return t1;
|
1075
|
-
}
|
1076
|
-
|
1077
|
-
|
1078
|
-
/*******************************************************************/
|
1079
|
-
/* */
|
1080
|
-
/* twol_left_rule */
|
1081
|
-
/* */
|
1082
|
-
/*******************************************************************/
|
1083
|
-
|
1084
|
-
static Transducer *twol_left_rule( Transducer *lc, Range *lower_range,
|
1085
|
-
Range *upper_range, Transducer *rc )
|
1086
|
-
|
1087
|
-
{
|
1088
|
-
// check for problematic insertion operations like "$L <> <= a $R"
|
1089
|
-
// where either $L or $R includes the empty string
|
1090
|
-
if (in_range(Label::epsilon, lower_range)) {
|
1091
|
-
if (lc->generates_empty_string())
|
1092
|
-
error("in two level rule: insertion operation with deletable left context!");
|
1093
|
-
if (rc->generates_empty_string())
|
1094
|
-
error("in two level rule: insertion operation with deletable right context!");
|
1095
|
-
cerr << "\nWarning: two level rule used for insertion operation (might produce unexpected results)\n";
|
1096
|
-
}
|
1097
|
-
|
1098
|
-
// Build the rule transducer
|
1099
|
-
Transducer *t1 = anti_cp(lower_range, upper_range);
|
1100
|
-
|
1101
|
-
// Add the left context;
|
1102
|
-
Transducer *t2 = &(*lc + *t1);
|
1103
|
-
delete t1;
|
1104
|
-
|
1105
|
-
// Add the right context;
|
1106
|
-
t1 = &(*t2 + *rc);
|
1107
|
-
delete t2;
|
1108
|
-
|
1109
|
-
// Form the complement
|
1110
|
-
t1->alphabet.copy(TheAlphabet);
|
1111
|
-
t2 = &(!*t1);
|
1112
|
-
delete t1;
|
1113
|
-
|
1114
|
-
return t2;
|
1115
|
-
}
|
1116
|
-
|
1117
|
-
|
1118
|
-
/*******************************************************************/
|
1119
|
-
/* */
|
1120
|
-
/* make_rule */
|
1121
|
-
/* */
|
1122
|
-
/*******************************************************************/
|
1123
|
-
|
1124
|
-
Transducer *make_rule( Transducer *lc, Range *lower_range, Twol_Type type,
|
1125
|
-
Range *upper_range, Transducer *rc )
|
1126
|
-
|
1127
|
-
{
|
1128
|
-
if (RS.size() > 0 || RSS.size() > 0)
|
1129
|
-
cerr << "\nWarning: agreement operation inside of replacement rule!\n";
|
1130
|
-
|
1131
|
-
if (!Alphabet_Defined)
|
1132
|
-
error("Two level rules require the definition of an alphabet");
|
1133
|
-
|
1134
|
-
// expand the left and the right contexts to their full length
|
1135
|
-
Transducer *pi=pi_machine(TheAlphabet);
|
1136
|
-
|
1137
|
-
if (lc == NULL)
|
1138
|
-
lc = pi_machine(TheAlphabet);
|
1139
|
-
else {
|
1140
|
-
Transducer *tmp = &(*pi + *lc);
|
1141
|
-
delete lc;
|
1142
|
-
lc = tmp;
|
1143
|
-
}
|
1144
|
-
if (rc == NULL)
|
1145
|
-
rc = pi_machine(TheAlphabet);
|
1146
|
-
else {
|
1147
|
-
Transducer *tmp = &(*rc + *pi);
|
1148
|
-
delete rc;
|
1149
|
-
rc = tmp;
|
1150
|
-
}
|
1151
|
-
delete pi;
|
1152
|
-
|
1153
|
-
Transducer *result = NULL;
|
1154
|
-
|
1155
|
-
switch (type) {
|
1156
|
-
case twol_left:
|
1157
|
-
result = twol_left_rule(lc, lower_range, upper_range, rc);
|
1158
|
-
break;
|
1159
|
-
case twol_right:
|
1160
|
-
result = twol_right_rule(lc, lower_range, upper_range, rc);
|
1161
|
-
break;
|
1162
|
-
case twol_both:
|
1163
|
-
{
|
1164
|
-
Transducer *t1 = twol_left_rule(lc, lower_range, upper_range, rc);
|
1165
|
-
Transducer *t2 = twol_right_rule(lc, lower_range, upper_range, rc);
|
1166
|
-
result = &(*t1 & *t2);
|
1167
|
-
delete t1;
|
1168
|
-
delete t2;
|
1169
|
-
}
|
1170
|
-
}
|
1171
|
-
delete lc;
|
1172
|
-
delete rc;
|
1173
|
-
if (lower_range != upper_range)
|
1174
|
-
free_values(lower_range);
|
1175
|
-
free_values(upper_range);
|
1176
|
-
|
1177
|
-
return minimise(result);
|
1178
|
-
}
|
1179
|
-
|
1180
|
-
|
1181
|
-
/*******************************************************************/
|
1182
|
-
/* */
|
1183
|
-
/* make_context */
|
1184
|
-
/* */
|
1185
|
-
/*******************************************************************/
|
1186
|
-
|
1187
|
-
Contexts *make_context( Transducer *l, Transducer *r )
|
1188
|
-
|
1189
|
-
{
|
1190
|
-
if (l == NULL)
|
1191
|
-
l = empty_transducer();
|
1192
|
-
if (r == NULL)
|
1193
|
-
r = empty_transducer();
|
1194
|
-
|
1195
|
-
Contexts *c=new Contexts();
|
1196
|
-
c->left = l;
|
1197
|
-
c->right = r;
|
1198
|
-
c->next = NULL;
|
1199
|
-
|
1200
|
-
return c;
|
1201
|
-
}
|
1202
|
-
|
1203
|
-
|
1204
|
-
/*******************************************************************/
|
1205
|
-
/* */
|
1206
|
-
/* add_context */
|
1207
|
-
/* */
|
1208
|
-
/*******************************************************************/
|
1209
|
-
|
1210
|
-
Contexts *add_context( Contexts *nc, Contexts *c )
|
1211
|
-
|
1212
|
-
{
|
1213
|
-
nc->next = c;
|
1214
|
-
return nc;
|
1215
|
-
}
|
1216
|
-
|
1217
|
-
|
1218
|
-
/*******************************************************************/
|
1219
|
-
/* */
|
1220
|
-
/* restriction_transducer */
|
1221
|
-
/* */
|
1222
|
-
/*******************************************************************/
|
1223
|
-
|
1224
|
-
static Transducer *restriction_transducer( Transducer *l1, Transducer *l2,
|
1225
|
-
Character marker )
|
1226
|
-
|
1227
|
-
{
|
1228
|
-
l1->alphabet.copy(TheAlphabet);
|
1229
|
-
Transducer *t1 = &(*l1 / *l2);
|
1230
|
-
|
1231
|
-
Transducer *t2 = &t1->replace_char(marker, Label::epsilon);
|
1232
|
-
delete t1;
|
1233
|
-
|
1234
|
-
t2->alphabet.copy(TheAlphabet);
|
1235
|
-
t1 = &(!*t2);
|
1236
|
-
delete t2;
|
1237
|
-
|
1238
|
-
return t1;
|
1239
|
-
}
|
1240
|
-
|
1241
|
-
|
1242
|
-
/*******************************************************************/
|
1243
|
-
/* */
|
1244
|
-
/* marker_transducer */
|
1245
|
-
/* */
|
1246
|
-
/*******************************************************************/
|
1247
|
-
|
1248
|
-
static Transducer *marker_transducer( Transducer *t, Contexts *c,
|
1249
|
-
Character &marker )
|
1250
|
-
{
|
1251
|
-
marker = TheAlphabet.new_marker();
|
1252
|
-
Transducer *result = one_label_transducer( Label(marker) );
|
1253
|
-
|
1254
|
-
// build the alphabet with a new marker
|
1255
|
-
result->alphabet.insert_symbols(t->alphabet);
|
1256
|
-
while (c) {
|
1257
|
-
result->alphabet.insert_symbols(c->left->alphabet);
|
1258
|
-
result->alphabet.insert_symbols(c->right->alphabet);
|
1259
|
-
c = c->next;
|
1260
|
-
}
|
1261
|
-
|
1262
|
-
return result;
|
1263
|
-
}
|
1264
|
-
|
1265
|
-
|
1266
|
-
/*******************************************************************/
|
1267
|
-
/* */
|
1268
|
-
/* center_transducer */
|
1269
|
-
/* */
|
1270
|
-
/*******************************************************************/
|
1271
|
-
|
1272
|
-
static Transducer *center_transducer( Transducer *t, Transducer *pi,
|
1273
|
-
Transducer *mt )
|
1274
|
-
{
|
1275
|
-
// create the concatenation pi + mt + *t + mt + pi
|
1276
|
-
Transducer *t1=&(*pi + *mt);
|
1277
|
-
Transducer *t2=&(*t1 + *t);
|
1278
|
-
delete t1;
|
1279
|
-
t1 = &(*t2 + *mt);
|
1280
|
-
delete t2;
|
1281
|
-
t2 = &(*t1 + *pi);
|
1282
|
-
delete t1;
|
1283
|
-
return t2;
|
1284
|
-
}
|
1285
|
-
|
1286
|
-
|
1287
|
-
/*******************************************************************/
|
1288
|
-
/* */
|
1289
|
-
/* context_transducer */
|
1290
|
-
/* */
|
1291
|
-
/*******************************************************************/
|
1292
|
-
|
1293
|
-
static Transducer *context_transducer( Transducer *t, Transducer *pi,
|
1294
|
-
Transducer *mt, Contexts *c )
|
1295
|
-
{
|
1296
|
-
// pi + left[i] + mt + pi + mt + right[i] + pi
|
1297
|
-
|
1298
|
-
Transducer *t1 = &(*mt + *t);
|
1299
|
-
Transducer *tmp = &(*t1 + *mt);
|
1300
|
-
delete t1;
|
1301
|
-
Transducer *result=NULL;
|
1302
|
-
|
1303
|
-
while (c) {
|
1304
|
-
t1 = &(*pi + *c->left);
|
1305
|
-
Transducer *t2 = &(*t1 + *tmp);
|
1306
|
-
delete t1;
|
1307
|
-
t1 = &(*t2 + *c->right);
|
1308
|
-
delete t2;
|
1309
|
-
t2 = &(*t1 + *pi);
|
1310
|
-
delete t1;
|
1311
|
-
|
1312
|
-
if (result) {
|
1313
|
-
t1 = &(*result | *t2);
|
1314
|
-
delete t2;
|
1315
|
-
result = t1;
|
1316
|
-
}
|
1317
|
-
else
|
1318
|
-
result = t2;
|
1319
|
-
|
1320
|
-
c = c->next;
|
1321
|
-
}
|
1322
|
-
delete tmp;
|
1323
|
-
|
1324
|
-
return result;
|
1325
|
-
}
|
1326
|
-
|
1327
|
-
|
1328
|
-
|
1329
|
-
/*******************************************************************/
|
1330
|
-
/* */
|
1331
|
-
/* result_transducer */
|
1332
|
-
/* */
|
1333
|
-
/*******************************************************************/
|
1334
|
-
|
1335
|
-
static Transducer *result_transducer( Transducer *l1, Transducer *l2,
|
1336
|
-
Twol_Type type, Character marker )
|
1337
|
-
{
|
1338
|
-
Transducer *result=NULL;
|
1339
|
-
if (type == twol_right)
|
1340
|
-
result = restriction_transducer( l1, l2, marker );
|
1341
|
-
else if (type == twol_left)
|
1342
|
-
result = restriction_transducer( l2, l1, marker );
|
1343
|
-
else if (type == twol_both) {
|
1344
|
-
Transducer *t1 = restriction_transducer( l1, l2, marker );
|
1345
|
-
Transducer *t2 = restriction_transducer( l2, l1, marker );
|
1346
|
-
result = &(*t1 & *t2);
|
1347
|
-
delete t1;
|
1348
|
-
delete t2;
|
1349
|
-
}
|
1350
|
-
|
1351
|
-
return result;
|
1352
|
-
}
|
1353
|
-
|
1354
|
-
|
1355
|
-
/*******************************************************************/
|
1356
|
-
/* */
|
1357
|
-
/* restriction */
|
1358
|
-
/* */
|
1359
|
-
/*******************************************************************/
|
1360
|
-
|
1361
|
-
Transducer *restriction( Transducer *t, Twol_Type type, Contexts *c,
|
1362
|
-
int direction )
|
1363
|
-
{
|
1364
|
-
Character marker;
|
1365
|
-
Transducer *mt=marker_transducer( t, c, marker );
|
1366
|
-
Transducer *pi=pi_machine(TheAlphabet);
|
1367
|
-
Transducer *l1=center_transducer( t, pi, mt );
|
1368
|
-
|
1369
|
-
Transducer *tmp;
|
1370
|
-
if (direction == 0)
|
1371
|
-
tmp = pi;
|
1372
|
-
else if (direction == 1) {
|
1373
|
-
// compute _t || .*
|
1374
|
-
Transducer *t1 = &t->lower_level();
|
1375
|
-
tmp = &(*t1 || *pi);
|
1376
|
-
delete t1;
|
1377
|
-
}
|
1378
|
-
else {
|
1379
|
-
// compute ^t || .*
|
1380
|
-
Transducer *t1 = &t->upper_level();
|
1381
|
-
tmp = &(*pi || *t1);
|
1382
|
-
delete t1;
|
1383
|
-
}
|
1384
|
-
delete t;
|
1385
|
-
|
1386
|
-
Transducer *l2=context_transducer( tmp, pi, mt, c );
|
1387
|
-
if (tmp != pi)
|
1388
|
-
delete tmp;
|
1389
|
-
delete pi;
|
1390
|
-
delete mt;
|
1391
|
-
|
1392
|
-
Transducer *result=result_transducer( l1, l2, type, marker );
|
1393
|
-
delete l1;
|
1394
|
-
delete l2;
|
1395
|
-
|
1396
|
-
free_contexts( c );
|
1397
|
-
|
1398
|
-
return result;
|
1399
|
-
}
|
1400
|
-
|
1401
|
-
|
1402
|
-
/*******************************************************************/
|
1403
|
-
/* */
|
1404
|
-
/* constrain_boundary_transducer */
|
1405
|
-
/* */
|
1406
|
-
/*******************************************************************/
|
1407
|
-
|
1408
|
-
static Transducer *constrain_boundary_transducer( Character leftm,
|
1409
|
-
Character rightm )
|
1410
|
-
{
|
1411
|
-
// create the transducer (.|<L>|<R>)*
|
1412
|
-
|
1413
|
-
Transducer *tmp=pi_machine(TheAlphabet);
|
1414
|
-
|
1415
|
-
// create the transducer (.|<L>|<R>)* <L><R> (.|<L>|<R>)*
|
1416
|
-
Node *root = tmp->root_node();
|
1417
|
-
Node *node = tmp->new_node();
|
1418
|
-
Node *last = tmp->new_node();
|
1419
|
-
|
1420
|
-
root->set_final(0);
|
1421
|
-
last->set_final(1);
|
1422
|
-
|
1423
|
-
root->add_arc( Label(leftm), node, tmp);
|
1424
|
-
node->add_arc( Label(rightm), last, tmp);
|
1425
|
-
|
1426
|
-
add_pi_transitions( tmp, last, TheAlphabet );
|
1427
|
-
|
1428
|
-
// create the transducer !((.|<L>|<R>)* <L><R> (.|<L>|<R>)*)
|
1429
|
-
tmp->alphabet.copy(TheAlphabet);
|
1430
|
-
Transducer *result = &(!*tmp);
|
1431
|
-
delete tmp;
|
1432
|
-
|
1433
|
-
return result;
|
1434
|
-
}
|
1435
|
-
|
1436
|
-
|
1437
|
-
/*******************************************************************/
|
1438
|
-
/* */
|
1439
|
-
/* extended_left_transducer */
|
1440
|
-
/* */
|
1441
|
-
/*******************************************************************/
|
1442
|
-
|
1443
|
-
static Transducer *extended_left_transducer( Transducer *t,
|
1444
|
-
Character m1, Character m2 )
|
1445
|
-
{
|
1446
|
-
if (t == NULL) // empty context
|
1447
|
-
return pi_machine(TheAlphabet);
|
1448
|
-
|
1449
|
-
// Extended left context transducer
|
1450
|
-
|
1451
|
-
// <R> >> (<L> >> $T$)
|
1452
|
-
Transducer *tmp=&t->freely_insert( Label(m1) );
|
1453
|
-
delete t;
|
1454
|
-
t = &tmp->freely_insert( Label(m2) );
|
1455
|
-
delete tmp;
|
1456
|
-
|
1457
|
-
// .* (<R> >> (<L> >> $T$))
|
1458
|
-
add_pi_transitions( t, t->root_node(), TheAlphabet );
|
1459
|
-
|
1460
|
-
// !(.*<L>)
|
1461
|
-
tmp = one_label_transducer(Label(m1));
|
1462
|
-
add_pi_transitions( tmp, tmp->root_node(), TheAlphabet );
|
1463
|
-
tmp->alphabet.copy(TheAlphabet);
|
1464
|
-
Transducer *t2 = &(!*tmp);
|
1465
|
-
delete tmp;
|
1466
|
-
|
1467
|
-
// .* (<R> >> (<L> >> $T$)) || !(.*<L>)
|
1468
|
-
tmp = &(*t || *t2);
|
1469
|
-
delete t;
|
1470
|
-
delete t2;
|
1471
|
-
|
1472
|
-
return tmp;
|
1473
|
-
}
|
1474
|
-
|
1475
|
-
|
1476
|
-
/*******************************************************************/
|
1477
|
-
/* */
|
1478
|
-
/* left_context */
|
1479
|
-
/* */
|
1480
|
-
/*******************************************************************/
|
1481
|
-
|
1482
|
-
static Transducer *left_context( Transducer *t, Character m1, Character m2 )
|
1483
|
-
|
1484
|
-
{
|
1485
|
-
// .* (<R> >> (<L> >> $T$)) || !(.*<L>)
|
1486
|
-
Transducer *ct = extended_left_transducer(t, m1, m2);
|
1487
|
-
|
1488
|
-
// <R>* <L> .*
|
1489
|
-
Transducer *mt = one_label_transducer(Label(m1));
|
1490
|
-
mt->root_node()->add_arc(Label(m2), mt->root_node(), mt );
|
1491
|
-
add_pi_transitions(mt, mt->root_node()->target_node(Label(m1)),TheAlphabet);
|
1492
|
-
|
1493
|
-
ct->alphabet.copy(TheAlphabet);
|
1494
|
-
Transducer *no_ct = &!*ct;
|
1495
|
-
|
1496
|
-
mt->alphabet.copy(TheAlphabet);
|
1497
|
-
Transducer *no_mt = &!*mt;
|
1498
|
-
|
1499
|
-
{
|
1500
|
-
static int print=1;
|
1501
|
-
if (print) {
|
1502
|
-
print = 0;
|
1503
|
-
Transducer *temp = &(ct->copy());
|
1504
|
-
temp = &(no_ct->copy());
|
1505
|
-
temp = &(mt->copy());
|
1506
|
-
temp = &(no_mt->copy());
|
1507
|
-
}
|
1508
|
-
}
|
1509
|
-
|
1510
|
-
Transducer *t1 = &(*no_ct + *mt);
|
1511
|
-
delete no_ct;
|
1512
|
-
delete mt;
|
1513
|
-
|
1514
|
-
Transducer *t2 = &(*ct + *no_mt);
|
1515
|
-
delete ct;
|
1516
|
-
delete no_mt;
|
1517
|
-
|
1518
|
-
Transducer *tmp = &(*t1 | *t2);
|
1519
|
-
delete t1;
|
1520
|
-
delete t2;
|
1521
|
-
|
1522
|
-
tmp->alphabet.copy(TheAlphabet);
|
1523
|
-
t1 = &!*tmp;
|
1524
|
-
delete tmp;
|
1525
|
-
|
1526
|
-
return t1;
|
1527
|
-
}
|
1528
|
-
|
1529
|
-
|
1530
|
-
/*******************************************************************/
|
1531
|
-
/* */
|
1532
|
-
/* make_optional */
|
1533
|
-
/* */
|
1534
|
-
/*******************************************************************/
|
1535
|
-
|
1536
|
-
static Transducer *make_optional( Transducer *t )
|
1537
|
-
|
1538
|
-
{
|
1539
|
-
Transducer *t1 = pi_machine(TheAlphabet);
|
1540
|
-
Transducer *t2 = &(*t | *t1);
|
1541
|
-
delete t;
|
1542
|
-
delete t1;
|
1543
|
-
return t2;
|
1544
|
-
}
|
1545
|
-
|
1546
|
-
|
1547
|
-
/*******************************************************************/
|
1548
|
-
/* */
|
1549
|
-
/* replace */
|
1550
|
-
/* */
|
1551
|
-
/*******************************************************************/
|
1552
|
-
|
1553
|
-
Transducer *replace( Transducer *ct, Repl_Type type, bool optional )
|
1554
|
-
|
1555
|
-
{
|
1556
|
-
// compute the no-center transducer
|
1557
|
-
Transducer *tmp;
|
1558
|
-
|
1559
|
-
if (type == repl_up)
|
1560
|
-
// _ct
|
1561
|
-
tmp = &ct->lower_level();
|
1562
|
-
else if (type == repl_down)
|
1563
|
-
// ^ct
|
1564
|
-
tmp = &ct->upper_level();
|
1565
|
-
else
|
1566
|
-
error("Invalid type of replace operator");
|
1567
|
-
|
1568
|
-
// .* _ct
|
1569
|
-
add_pi_transitions( tmp, tmp->root_node(), TheAlphabet );
|
1570
|
-
|
1571
|
-
// .* _ct .*
|
1572
|
-
Transducer *t2 = pi_machine(TheAlphabet);
|
1573
|
-
Transducer *t3 = &(*tmp + *t2);
|
1574
|
-
delete tmp;
|
1575
|
-
delete t2;
|
1576
|
-
|
1577
|
-
// no_ct = !(.* _ct .*)
|
1578
|
-
t3->alphabet.copy(TheAlphabet);
|
1579
|
-
Transducer *no_ct = &(!*t3);
|
1580
|
-
delete t3;
|
1581
|
-
|
1582
|
-
// compute the unconditional replacement transducer
|
1583
|
-
|
1584
|
-
// no-ct ct
|
1585
|
-
tmp = &(*no_ct + *ct);
|
1586
|
-
delete ct;
|
1587
|
-
|
1588
|
-
// (no-ct ct)*
|
1589
|
-
t2 = &(tmp->kleene_star());
|
1590
|
-
delete tmp;
|
1591
|
-
|
1592
|
-
// (no-ct ct)* no-ct
|
1593
|
-
tmp = &(*t2 + *no_ct);
|
1594
|
-
delete t2;
|
1595
|
-
delete no_ct;
|
1596
|
-
|
1597
|
-
if (optional)
|
1598
|
-
tmp = make_optional(tmp);
|
1599
|
-
|
1600
|
-
return tmp;
|
1601
|
-
}
|
1602
|
-
|
1603
|
-
|
1604
|
-
/*******************************************************************/
|
1605
|
-
/* */
|
1606
|
-
/* replace_transducer */
|
1607
|
-
/* */
|
1608
|
-
/*******************************************************************/
|
1609
|
-
|
1610
|
-
static Transducer *replace_transducer( Transducer *ct, Character lm,
|
1611
|
-
Character rm, Repl_Type type )
|
1612
|
-
{
|
1613
|
-
// insert boundary markers into the center transducer
|
1614
|
-
|
1615
|
-
// <L> >> (<R> >> $Center$)
|
1616
|
-
Transducer *tmp = &ct->freely_insert(Label(lm));
|
1617
|
-
delete ct;
|
1618
|
-
ct = &tmp->freely_insert(Label(rm));
|
1619
|
-
delete tmp;
|
1620
|
-
|
1621
|
-
// add surrounding boundary markers to the center transducer
|
1622
|
-
|
1623
|
-
// <L> (<L> >> (<R> >> $Center$))
|
1624
|
-
Transducer *t2 = one_label_transducer( Label(lm) );
|
1625
|
-
tmp = &(*t2 + *ct);
|
1626
|
-
delete t2;
|
1627
|
-
delete ct;
|
1628
|
-
|
1629
|
-
// $CenterB$ = <L> (<L> >> (<R> >> $Center$)) <R>
|
1630
|
-
t2 = one_label_transducer( Label(rm) );
|
1631
|
-
ct = &(*tmp + *t2);
|
1632
|
-
delete tmp;
|
1633
|
-
delete t2;
|
1634
|
-
|
1635
|
-
return replace(ct, type, false);
|
1636
|
-
}
|
1637
|
-
|
1638
|
-
|
1639
|
-
/*******************************************************************/
|
1640
|
-
/* */
|
1641
|
-
/* replace_in_context */
|
1642
|
-
/* */
|
1643
|
-
/*******************************************************************/
|
1644
|
-
|
1645
|
-
Transducer *replace_in_context( Transducer *t, Repl_Type type, Contexts *c,
|
1646
|
-
bool optional )
|
1647
|
-
{
|
1648
|
-
// The implementation of the replace operators is based on
|
1649
|
-
// "The Replace Operator" by Lauri Karttunen
|
1650
|
-
|
1651
|
-
if (!Alphabet_Defined)
|
1652
|
-
error("The replace operators require the definition of an alphabet");
|
1653
|
-
|
1654
|
-
if (!c->left->is_automaton() || !c->right->is_automaton())
|
1655
|
-
error("The replace operators require automata as context expressions!");
|
1656
|
-
|
1657
|
-
// create the marker symbols
|
1658
|
-
Character leftm = TheAlphabet.new_marker();
|
1659
|
-
Character rightm = TheAlphabet.new_marker();
|
1660
|
-
|
1661
|
-
/////////////////////////////////////////////////////////////
|
1662
|
-
// Create the insert boundaries transducer (.|<>:<L>|<>:<R>)*
|
1663
|
-
/////////////////////////////////////////////////////////////
|
1664
|
-
|
1665
|
-
Transducer *ibt=pi_machine(TheAlphabet);
|
1666
|
-
Node *root=ibt->root_node();
|
1667
|
-
root->add_arc( Label(Label::epsilon, leftm), root, ibt);
|
1668
|
-
root->add_arc( Label(Label::epsilon, rightm),root, ibt);
|
1669
|
-
|
1670
|
-
/////////////////////////////////////////////////////////////
|
1671
|
-
// Create the remove boundaries transducer (.|<L>:<>|<R>:<>)*
|
1672
|
-
/////////////////////////////////////////////////////////////
|
1673
|
-
|
1674
|
-
Transducer *rbt=pi_machine(TheAlphabet);
|
1675
|
-
root = rbt->root_node();
|
1676
|
-
root->add_arc( Label(leftm, Label::epsilon), root, rbt);
|
1677
|
-
root->add_arc( Label(rightm,Label::epsilon), root, rbt);
|
1678
|
-
|
1679
|
-
// Add the markers to the alphabet
|
1680
|
-
TheAlphabet.insert(Label(leftm));
|
1681
|
-
TheAlphabet.insert(Label(rightm));
|
1682
|
-
|
1683
|
-
/////////////////////////////////////////////////////////////
|
1684
|
-
// Create the constrain boundaries transducer !(.*<L><R>.*)
|
1685
|
-
/////////////////////////////////////////////////////////////
|
1686
|
-
|
1687
|
-
Transducer *cbt=constrain_boundary_transducer(leftm, rightm);
|
1688
|
-
|
1689
|
-
/////////////////////////////////////////////////////////////
|
1690
|
-
// Create the extended context transducers
|
1691
|
-
/////////////////////////////////////////////////////////////
|
1692
|
-
|
1693
|
-
// left context transducer: .* (<R> >> (<L> >> $T$)) || !(.*<L>)
|
1694
|
-
Transducer *lct = left_context(c->left, leftm, rightm);
|
1695
|
-
|
1696
|
-
// right context transducer: (<R> >> (<L> >> $T$)) .* || !(<R>.*)
|
1697
|
-
Transducer *tmp = &c->right->reverse();
|
1698
|
-
delete c->right;
|
1699
|
-
Transducer *t2 = left_context(tmp, rightm, leftm);
|
1700
|
-
Transducer *rct = &t2->reverse();
|
1701
|
-
delete t2;
|
1702
|
-
|
1703
|
-
/////////////////////////////////////////////////////////////
|
1704
|
-
// unconditional replace transducer
|
1705
|
-
/////////////////////////////////////////////////////////////
|
1706
|
-
|
1707
|
-
Transducer *rt;
|
1708
|
-
if (type == repl_up || type == repl_right || type == repl_left)
|
1709
|
-
rt = replace_transducer( t, leftm, rightm, repl_up );
|
1710
|
-
else
|
1711
|
-
rt = replace_transducer( t, leftm, rightm, repl_down );
|
1712
|
-
|
1713
|
-
/////////////////////////////////////////////////////////////
|
1714
|
-
// build the conditional replacement transducer
|
1715
|
-
/////////////////////////////////////////////////////////////
|
1716
|
-
|
1717
|
-
tmp = &(ibt->copy());
|
1718
|
-
tmp = &(cbt->copy());
|
1719
|
-
tmp = &(lct->copy());
|
1720
|
-
tmp = &(rct->copy());
|
1721
|
-
tmp = &(rt->copy());
|
1722
|
-
tmp = &(rbt->copy());
|
1723
|
-
|
1724
|
-
tmp = ibt;
|
1725
|
-
tmp = &(*ibt || *cbt);
|
1726
|
-
delete(ibt);
|
1727
|
-
delete(cbt);
|
1728
|
-
|
1729
|
-
if (type == repl_up || type == repl_left) {
|
1730
|
-
t2 = &(*tmp || *lct);
|
1731
|
-
delete tmp;
|
1732
|
-
delete lct;
|
1733
|
-
tmp = t2;
|
1734
|
-
}
|
1735
|
-
if (type == repl_up || type == repl_right) {
|
1736
|
-
t2 = &(*tmp || *rct);
|
1737
|
-
delete tmp;
|
1738
|
-
delete rct;
|
1739
|
-
tmp = t2;
|
1740
|
-
}
|
1741
|
-
|
1742
|
-
t2 = &(*tmp || *rt);
|
1743
|
-
delete tmp;
|
1744
|
-
delete rt;
|
1745
|
-
tmp = t2;
|
1746
|
-
|
1747
|
-
if (type == repl_down || type == repl_right) {
|
1748
|
-
t2 = &(*tmp || *lct);
|
1749
|
-
delete tmp;
|
1750
|
-
delete lct;
|
1751
|
-
tmp = t2;
|
1752
|
-
}
|
1753
|
-
if (type == repl_down || type == repl_left) {
|
1754
|
-
t2 = &(*tmp || *rct);
|
1755
|
-
delete tmp;
|
1756
|
-
delete rct;
|
1757
|
-
tmp = t2;
|
1758
|
-
}
|
1759
|
-
|
1760
|
-
t2 = &(*tmp || *rbt);
|
1761
|
-
delete tmp;
|
1762
|
-
delete rbt;
|
1763
|
-
|
1764
|
-
// Remove the markers from the alphabet
|
1765
|
-
TheAlphabet.delete_markers();
|
1766
|
-
|
1767
|
-
if (optional)
|
1768
|
-
t2 = make_optional(t2);
|
1769
|
-
|
1770
|
-
free_contexts( c );
|
1771
|
-
|
1772
|
-
return t2;
|
1773
|
-
}
|
1774
|
-
|
1775
|
-
|
1776
|
-
/*******************************************************************/
|
1777
|
-
/* */
|
1778
|
-
/* add_alphabet */
|
1779
|
-
/* */
|
1780
|
-
/*******************************************************************/
|
1781
|
-
|
1782
|
-
void add_alphabet( Transducer *t )
|
1783
|
-
|
1784
|
-
{
|
1785
|
-
t->alphabet.copy(TheAlphabet);
|
1786
|
-
t->complete_alphabet();
|
1787
|
-
}
|
1788
|
-
|
1789
|
-
|
1790
|
-
/*******************************************************************/
|
1791
|
-
/* */
|
1792
|
-
/* write_to_file */
|
1793
|
-
/* */
|
1794
|
-
/*******************************************************************/
|
1795
|
-
|
1796
|
-
void write_to_file( Transducer *t, char *filename)
|
1797
|
-
|
1798
|
-
{
|
1799
|
-
FILE *file;
|
1800
|
-
if ((file = fopen(filename,"wb")) == NULL) {
|
1801
|
-
fprintf(stderr,"\nError: Cannot open output file \"%s\"\n\n", filename);
|
1802
|
-
exit(1);
|
1803
|
-
}
|
1804
|
-
free( filename );
|
1805
|
-
|
1806
|
-
t = explode(t);
|
1807
|
-
add_alphabet(t);
|
1808
|
-
t = minimise(t);
|
1809
|
-
t->store(file);
|
1810
|
-
fclose(file);
|
1811
|
-
}
|
1812
|
-
|
1813
|
-
|
1814
|
-
/*******************************************************************/
|
1815
|
-
/* */
|
1816
|
-
/* result */
|
1817
|
-
/* */
|
1818
|
-
/*******************************************************************/
|
1819
|
-
|
1820
|
-
Transducer *result( Transducer *t, bool switch_flag )
|
1821
|
-
|
1822
|
-
{
|
1823
|
-
t = explode(t);
|
1824
|
-
|
1825
|
-
// delete the variable values
|
1826
|
-
vector<char*> s;
|
1827
|
-
for( VarMap::iterator it=VM.begin(); it != VM.end(); it++ ) {
|
1828
|
-
s.push_back(it->first);
|
1829
|
-
delete it->second;
|
1830
|
-
it->second = NULL;
|
1831
|
-
}
|
1832
|
-
VM.clear();
|
1833
|
-
for( size_t i=0; i<s.size(); i++ )
|
1834
|
-
free(s[i]);
|
1835
|
-
s.clear();
|
1836
|
-
|
1837
|
-
if (switch_flag)
|
1838
|
-
t = switch_levels(t);
|
1839
|
-
add_alphabet(t);
|
1840
|
-
t = minimise(t);
|
1841
|
-
return t;
|
1842
|
-
}
|