ruby-sfst 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -0
- data/COPYING +280 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +54 -0
- data/README.md +1 -1
- data/Rakefile +9 -18
- data/bin/console +7 -0
- data/bin/setup +6 -0
- data/ext/sfst/alphabet.cc +879 -0
- data/ext/sfst/alphabet.h +302 -0
- data/ext/sfst/basic.cc +85 -0
- data/ext/{sfst_machine → sfst}/basic.h +7 -4
- data/ext/sfst/compact.cc +629 -0
- data/ext/sfst/compact.h +100 -0
- data/ext/sfst/determinise.cc +279 -0
- data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
- data/ext/sfst/fst.cc +1150 -0
- data/ext/sfst/fst.h +374 -0
- data/ext/sfst/hopcroft.cc +681 -0
- data/ext/sfst/interface.cc +1921 -0
- data/ext/sfst/interface.h +171 -0
- data/ext/sfst/make-compact.cc +323 -0
- data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
- data/ext/sfst/mem.h +80 -0
- data/ext/sfst/operators.cc +1273 -0
- data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
- data/ext/sfst/sgi.h +72 -0
- data/ext/sfst/utf8.cc +149 -0
- data/ext/{sfst_machine → sfst}/utf8.h +7 -4
- data/lib/sfst.rb +2 -1
- data/lib/sfst/version.rb +1 -1
- data/ruby-sfst.gemspec +23 -23
- metadata +107 -35
- data/ext/sfst_machine/alphabet.cc +0 -812
- data/ext/sfst_machine/alphabet.h +0 -273
- data/ext/sfst_machine/basic.cc +0 -84
- data/ext/sfst_machine/compact.cc +0 -616
- data/ext/sfst_machine/compact.h +0 -98
- data/ext/sfst_machine/determinise.cc +0 -303
- data/ext/sfst_machine/fst.cc +0 -1000
- data/ext/sfst_machine/fst.h +0 -369
- data/ext/sfst_machine/interface.cc +0 -1842
- data/ext/sfst_machine/interface.h +0 -93
- data/ext/sfst_machine/make-compact.cc +0 -327
- data/ext/sfst_machine/mem.h +0 -74
- data/ext/sfst_machine/operators.cc +0 -1131
- data/ext/sfst_machine/sgi.h +0 -44
- data/ext/sfst_machine/utf8.cc +0 -146
- data/test/test_sfst.fst +0 -3
- data/test/test_sfst.rb +0 -114
@@ -0,0 +1,1921 @@
|
|
1
|
+
/*******************************************************************/
|
2
|
+
/* */
|
3
|
+
/* FILE interface.C */
|
4
|
+
/* MODULE interface */
|
5
|
+
/* PROGRAM SFST */
|
6
|
+
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
|
7
|
+
/* */
|
8
|
+
/*******************************************************************/
|
9
|
+
|
10
|
+
#include <fstream>
|
11
|
+
#include <set>
|
12
|
+
|
13
|
+
#include "interface.h"
|
14
|
+
|
15
|
+
using std::ifstream;
|
16
|
+
using std::vector;
|
17
|
+
|
18
|
+
namespace SFST {
|
19
|
+
|
20
|
+
/*******************************************************************/
|
21
|
+
/* */
|
22
|
+
/* error */
|
23
|
+
/* */
|
24
|
+
/*******************************************************************/
|
25
|
+
|
26
|
+
void error( const char *message )
|
27
|
+
|
28
|
+
{
|
29
|
+
cerr << "\nError: " << message << "\naborted.\n";
|
30
|
+
exit(1);
|
31
|
+
}
|
32
|
+
|
33
|
+
|
34
|
+
/*******************************************************************/
|
35
|
+
/* */
|
36
|
+
/* error2 */
|
37
|
+
/* */
|
38
|
+
/*******************************************************************/
|
39
|
+
|
40
|
+
void error2( const char *message, char *input )
|
41
|
+
|
42
|
+
{
|
43
|
+
cerr << "\nError: " << message << ": " << input << "\naborted.\n";
|
44
|
+
exit(1);
|
45
|
+
}
|
46
|
+
|
47
|
+
|
48
|
+
/*******************************************************************/
|
49
|
+
/* */
|
50
|
+
/* Interface::symbol_code */
|
51
|
+
/* */
|
52
|
+
/*******************************************************************/
|
53
|
+
|
54
|
+
Character Interface::symbol_code( char *symbol )
|
55
|
+
|
56
|
+
{
|
57
|
+
int c=TheAlphabet.symbol2code(symbol);
|
58
|
+
if (c == EOF)
|
59
|
+
c = TheAlphabet.add_symbol( symbol );
|
60
|
+
free(symbol);
|
61
|
+
return (Character)c;
|
62
|
+
}
|
63
|
+
|
64
|
+
|
65
|
+
/*******************************************************************/
|
66
|
+
/* */
|
67
|
+
/* Interface::character_code */
|
68
|
+
/* */
|
69
|
+
/*******************************************************************/
|
70
|
+
|
71
|
+
Character Interface::character_code( unsigned int uc )
|
72
|
+
|
73
|
+
{
|
74
|
+
if (TheAlphabet.utf8)
|
75
|
+
return symbol_code(fst_strdup(int2utf8(uc)));
|
76
|
+
|
77
|
+
unsigned char *buffer=(unsigned char*)malloc(2);
|
78
|
+
buffer[0] = (unsigned char)uc;
|
79
|
+
buffer[1] = 0;
|
80
|
+
|
81
|
+
return symbol_code((char*)buffer);
|
82
|
+
}
|
83
|
+
|
84
|
+
|
85
|
+
/*******************************************************************/
|
86
|
+
/* */
|
87
|
+
/* Interface::add_value */
|
88
|
+
/* */
|
89
|
+
/*******************************************************************/
|
90
|
+
|
91
|
+
Range *Interface::add_value( Character c, Range *r )
|
92
|
+
|
93
|
+
{
|
94
|
+
Range *result=new Range;
|
95
|
+
result->character = c;
|
96
|
+
result->next = r;
|
97
|
+
return result;
|
98
|
+
}
|
99
|
+
|
100
|
+
|
101
|
+
/*******************************************************************/
|
102
|
+
/* */
|
103
|
+
/* Interface::add_values */
|
104
|
+
/* */
|
105
|
+
/*******************************************************************/
|
106
|
+
|
107
|
+
Range *Interface::add_values( unsigned int c1, unsigned int c2, Range *r )
|
108
|
+
|
109
|
+
{
|
110
|
+
for( unsigned int c=c2; c>=c1; c-- )
|
111
|
+
r = add_value(character_code(c), r);
|
112
|
+
return r;
|
113
|
+
}
|
114
|
+
|
115
|
+
|
116
|
+
/*******************************************************************/
|
117
|
+
/* */
|
118
|
+
/* Interface::append_values */
|
119
|
+
/* */
|
120
|
+
/*******************************************************************/
|
121
|
+
|
122
|
+
Range *Interface::append_values( Range *r2, Range *r )
|
123
|
+
|
124
|
+
{
|
125
|
+
if (r2 == NULL)
|
126
|
+
return r;
|
127
|
+
return add_value(r2->character, append_values(r2->next, r));
|
128
|
+
}
|
129
|
+
|
130
|
+
|
131
|
+
/*******************************************************************/
|
132
|
+
/* */
|
133
|
+
/* Interface::add_var_values */
|
134
|
+
/* */
|
135
|
+
/*******************************************************************/
|
136
|
+
|
137
|
+
Range *Interface::add_var_values( char *name, Range *r )
|
138
|
+
|
139
|
+
{
|
140
|
+
return append_values(svar_value(name), r);
|
141
|
+
}
|
142
|
+
|
143
|
+
|
144
|
+
/*******************************************************************/
|
145
|
+
/* */
|
146
|
+
/* Interface::in_range */
|
147
|
+
/* */
|
148
|
+
/*******************************************************************/
|
149
|
+
|
150
|
+
bool Interface::in_range( unsigned int c, Range *r )
|
151
|
+
|
152
|
+
{
|
153
|
+
while (r) {
|
154
|
+
if (r->character == c)
|
155
|
+
return true;
|
156
|
+
r = r->next;
|
157
|
+
}
|
158
|
+
return false;
|
159
|
+
}
|
160
|
+
|
161
|
+
|
162
|
+
/*******************************************************************/
|
163
|
+
/* */
|
164
|
+
/* free_values */
|
165
|
+
/* */
|
166
|
+
/*******************************************************************/
|
167
|
+
|
168
|
+
static void free_values( Range *r )
|
169
|
+
|
170
|
+
{
|
171
|
+
if (r) {
|
172
|
+
free_values(r->next);
|
173
|
+
delete r;
|
174
|
+
}
|
175
|
+
}
|
176
|
+
|
177
|
+
|
178
|
+
/*******************************************************************/
|
179
|
+
/* */
|
180
|
+
/* free_values */
|
181
|
+
/* */
|
182
|
+
/*******************************************************************/
|
183
|
+
|
184
|
+
static void free_values( Ranges *r )
|
185
|
+
|
186
|
+
{
|
187
|
+
if (r) {
|
188
|
+
free_values(r->next);
|
189
|
+
delete r;
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
|
194
|
+
/*******************************************************************/
|
195
|
+
/* */
|
196
|
+
/* free_contexts */
|
197
|
+
/* */
|
198
|
+
/*******************************************************************/
|
199
|
+
|
200
|
+
static void free_contexts( Contexts *c )
|
201
|
+
|
202
|
+
{
|
203
|
+
if (c) {
|
204
|
+
free_contexts(c->next);
|
205
|
+
delete c;
|
206
|
+
}
|
207
|
+
}
|
208
|
+
|
209
|
+
|
210
|
+
/*******************************************************************/
|
211
|
+
/* */
|
212
|
+
/* Interface::copy_values */
|
213
|
+
/* */
|
214
|
+
/*******************************************************************/
|
215
|
+
|
216
|
+
Range *Interface::copy_values( const Range *r )
|
217
|
+
|
218
|
+
{
|
219
|
+
if (r == NULL)
|
220
|
+
return NULL;
|
221
|
+
return add_value( r->character, copy_values(r->next));
|
222
|
+
}
|
223
|
+
|
224
|
+
|
225
|
+
/*******************************************************************/
|
226
|
+
/* */
|
227
|
+
/* Interface::complement_range */
|
228
|
+
/* */
|
229
|
+
/*******************************************************************/
|
230
|
+
|
231
|
+
Range *Interface::complement_range( Range *r )
|
232
|
+
|
233
|
+
{
|
234
|
+
vector<Character> sym;
|
235
|
+
for( Range *p=r; p; p=p->next)
|
236
|
+
sym.push_back( p->character );
|
237
|
+
free_values( r );
|
238
|
+
|
239
|
+
TheAlphabet.complement(sym);
|
240
|
+
if (sym.size() == 0)
|
241
|
+
error("Empty character range!");
|
242
|
+
|
243
|
+
Range *result=NULL;
|
244
|
+
for( size_t i=0; i<sym.size(); i++ ) {
|
245
|
+
Range *tmp = new Range;
|
246
|
+
tmp->character = sym[i];
|
247
|
+
tmp->next = result;
|
248
|
+
result = tmp;
|
249
|
+
}
|
250
|
+
|
251
|
+
return result;
|
252
|
+
}
|
253
|
+
|
254
|
+
|
255
|
+
/*******************************************************************/
|
256
|
+
/* */
|
257
|
+
/* Interface::make_transducer */
|
258
|
+
/* */
|
259
|
+
/*******************************************************************/
|
260
|
+
|
261
|
+
Transducer *Interface::make_transducer( Range *r1, Range *r2 )
|
262
|
+
|
263
|
+
{
|
264
|
+
Transducer *t=new Transducer();
|
265
|
+
Node *node=t->new_node();
|
266
|
+
node->set_final(1);
|
267
|
+
|
268
|
+
if (r1 == NULL || r2 == NULL) {
|
269
|
+
if (!Alphabet_Defined)
|
270
|
+
error("The wildcard symbol '.' requires the definition of an alphabet");
|
271
|
+
|
272
|
+
// one of the ranges was '.'
|
273
|
+
for(Alphabet::const_iterator it=TheAlphabet.begin();
|
274
|
+
it!=TheAlphabet.end(); it++)
|
275
|
+
if ((r1 == NULL || in_range(it->lower_char(), r1)) &&
|
276
|
+
(r2 == NULL || in_range(it->upper_char(), r2)))
|
277
|
+
t->root_node()->add_arc( *it, node, t );
|
278
|
+
}
|
279
|
+
else {
|
280
|
+
for (;;) {
|
281
|
+
Label l(r1->character, r2->character);
|
282
|
+
// TheAlphabet.insert(l);
|
283
|
+
t->root_node()->add_arc( l, node, t );
|
284
|
+
if (!r1->next && !r2->next)
|
285
|
+
break;
|
286
|
+
if (r1->next)
|
287
|
+
r1 = r1->next;
|
288
|
+
if (r2->next)
|
289
|
+
r2 = r2->next;
|
290
|
+
}
|
291
|
+
}
|
292
|
+
|
293
|
+
return t;
|
294
|
+
}
|
295
|
+
|
296
|
+
|
297
|
+
/*******************************************************************/
|
298
|
+
/* */
|
299
|
+
/* Interface::one_label_transducer */
|
300
|
+
/* */
|
301
|
+
/*******************************************************************/
|
302
|
+
|
303
|
+
Transducer *Interface::one_label_transducer( Label l )
|
304
|
+
|
305
|
+
{
|
306
|
+
Transducer *t = new Transducer();
|
307
|
+
Node *last = t->new_node();
|
308
|
+
t->root_node()->add_arc( l, last, t );
|
309
|
+
last->set_final(1);
|
310
|
+
|
311
|
+
return t;
|
312
|
+
}
|
313
|
+
|
314
|
+
|
315
|
+
/*******************************************************************/
|
316
|
+
/* */
|
317
|
+
/* Interface::new_transducer */
|
318
|
+
/* */
|
319
|
+
/*******************************************************************/
|
320
|
+
|
321
|
+
Transducer *Interface::new_transducer( Range *r1, Range *r2 )
|
322
|
+
|
323
|
+
{
|
324
|
+
Transducer *t=make_transducer( r1, r2);
|
325
|
+
if (r1 != r2)
|
326
|
+
free_values(r1);
|
327
|
+
free_values(r2);
|
328
|
+
return t;
|
329
|
+
}
|
330
|
+
|
331
|
+
|
332
|
+
/*******************************************************************/
|
333
|
+
/* */
|
334
|
+
/* Interface::read_words */
|
335
|
+
/* */
|
336
|
+
/*******************************************************************/
|
337
|
+
|
338
|
+
Transducer *Interface::read_words( char *filename )
|
339
|
+
|
340
|
+
{
|
341
|
+
if (Verbose)
|
342
|
+
fprintf(stderr,"\nreading words from %s...", filename);
|
343
|
+
ifstream is(filename);
|
344
|
+
if (!is.is_open()) {
|
345
|
+
static char message[1000];
|
346
|
+
sprintf(message,"Error: Cannot open file \"%s\"!", filename);
|
347
|
+
throw message;
|
348
|
+
}
|
349
|
+
free( filename );
|
350
|
+
Transducer *t = new Transducer(is, &TheAlphabet, Verbose, LexiconComments);
|
351
|
+
is.close();
|
352
|
+
TheAlphabet.insert_symbols(t->alphabet);
|
353
|
+
if (Verbose)
|
354
|
+
fprintf(stderr,"finished\n");
|
355
|
+
return t;
|
356
|
+
}
|
357
|
+
|
358
|
+
|
359
|
+
/*******************************************************************/
|
360
|
+
/* */
|
361
|
+
/* Interface::read_transducer */
|
362
|
+
/* */
|
363
|
+
/*******************************************************************/
|
364
|
+
|
365
|
+
Transducer *Interface::read_transducer( char *filename )
|
366
|
+
|
367
|
+
{
|
368
|
+
if (Verbose)
|
369
|
+
fprintf(stderr,"\nreading transducer from %s...", filename);
|
370
|
+
FILE *file = fopen(filename,"rb");
|
371
|
+
if (file == NULL) {
|
372
|
+
static char message[1000];
|
373
|
+
sprintf(message,"Error: Cannot open file \"%s\"!",filename);
|
374
|
+
throw message;
|
375
|
+
}
|
376
|
+
Transducer t(file);
|
377
|
+
fclose(file);
|
378
|
+
if (t.alphabet.utf8 != TheAlphabet.utf8) {
|
379
|
+
static char message[1000];
|
380
|
+
sprintf(message,"Error: incompatible character encoding in file \"%s\"!",
|
381
|
+
filename);
|
382
|
+
throw message;
|
383
|
+
}
|
384
|
+
free( filename );
|
385
|
+
// transfer the encoding og TheAlphabet to the transducer
|
386
|
+
Transducer *nt = &t.copy(false, &TheAlphabet);
|
387
|
+
TheAlphabet.insert_symbols(nt->alphabet);
|
388
|
+
if (Verbose)
|
389
|
+
fprintf(stderr,"finished\n");
|
390
|
+
return nt;
|
391
|
+
}
|
392
|
+
|
393
|
+
|
394
|
+
/*******************************************************************/
|
395
|
+
/* */
|
396
|
+
/* Interface::def_alphabet */
|
397
|
+
/* */
|
398
|
+
/*******************************************************************/
|
399
|
+
|
400
|
+
void Interface::def_alphabet( Transducer *t )
|
401
|
+
|
402
|
+
{
|
403
|
+
t = explode(t);
|
404
|
+
t = minimise(t);
|
405
|
+
t->alphabet.clear_char_pairs();
|
406
|
+
t->complete_alphabet();
|
407
|
+
TheAlphabet.clear_char_pairs();
|
408
|
+
TheAlphabet.copy(t->alphabet);
|
409
|
+
Alphabet_Defined = 1;
|
410
|
+
delete t;
|
411
|
+
}
|
412
|
+
|
413
|
+
|
414
|
+
/*******************************************************************/
|
415
|
+
/* */
|
416
|
+
/* Interface::def_svar definition of a value range variable */
|
417
|
+
/* */
|
418
|
+
/*******************************************************************/
|
419
|
+
|
420
|
+
bool Interface::def_svar( char *name, Range *r )
|
421
|
+
|
422
|
+
{
|
423
|
+
// delete the old value of the variable
|
424
|
+
SVarMap::iterator it=SVM.find(name);
|
425
|
+
if (it != SVM.end()) {
|
426
|
+
char *n=it->first;
|
427
|
+
Range *v=it->second;
|
428
|
+
SVM.erase(it);
|
429
|
+
delete v;
|
430
|
+
free(n);
|
431
|
+
}
|
432
|
+
SVM[name] = r;
|
433
|
+
return r == NULL;
|
434
|
+
}
|
435
|
+
|
436
|
+
|
437
|
+
/*******************************************************************/
|
438
|
+
/* */
|
439
|
+
/* Interface::svar_value */
|
440
|
+
/* */
|
441
|
+
/*******************************************************************/
|
442
|
+
|
443
|
+
Range *Interface::svar_value( char *name )
|
444
|
+
|
445
|
+
{
|
446
|
+
SVarMap::iterator it=SVM.find(name);
|
447
|
+
if (it == SVM.end())
|
448
|
+
error2("undefined variable", name);
|
449
|
+
free(name);
|
450
|
+
return copy_values(it->second);
|
451
|
+
}
|
452
|
+
|
453
|
+
|
454
|
+
/*******************************************************************/
|
455
|
+
/* */
|
456
|
+
/* Interface::rsvar_value */
|
457
|
+
/* */
|
458
|
+
/*******************************************************************/
|
459
|
+
|
460
|
+
Range *Interface::rsvar_value( char *name )
|
461
|
+
|
462
|
+
{
|
463
|
+
if (RSS.find(name) == RSS.end())
|
464
|
+
RSS.insert(fst_strdup(name));
|
465
|
+
return add_value(symbol_code(name), NULL);
|
466
|
+
}
|
467
|
+
|
468
|
+
|
469
|
+
/*******************************************************************/
|
470
|
+
/* */
|
471
|
+
/* Interface::def_var definition of a transducer variable */
|
472
|
+
/* */
|
473
|
+
/*******************************************************************/
|
474
|
+
|
475
|
+
bool Interface::def_var( char *name, Transducer *t )
|
476
|
+
|
477
|
+
{
|
478
|
+
// delete the old value of the variable
|
479
|
+
VarMap::iterator it=VM.find(name);
|
480
|
+
if (it != VM.end()) {
|
481
|
+
char *n=it->first;
|
482
|
+
Transducer *v=it->second;
|
483
|
+
VM.erase(it);
|
484
|
+
delete v;
|
485
|
+
free(n);
|
486
|
+
}
|
487
|
+
|
488
|
+
t = explode(t);
|
489
|
+
t = minimise(t);
|
490
|
+
|
491
|
+
VM[name] = t;
|
492
|
+
return t->is_empty();
|
493
|
+
}
|
494
|
+
|
495
|
+
|
496
|
+
/*******************************************************************/
|
497
|
+
/* */
|
498
|
+
/* Interface::def_rvar */
|
499
|
+
/* definition of an agreement variable for automata */
|
500
|
+
/* */
|
501
|
+
/*******************************************************************/
|
502
|
+
|
503
|
+
bool Interface::def_rvar( char *name, Transducer *t )
|
504
|
+
|
505
|
+
{
|
506
|
+
if (t->is_cyclic())
|
507
|
+
error2("cyclic transducer assigned to", name);
|
508
|
+
return def_var( name, t );
|
509
|
+
}
|
510
|
+
|
511
|
+
|
512
|
+
/*******************************************************************/
|
513
|
+
/* */
|
514
|
+
/* Interface::var_value */
|
515
|
+
/* */
|
516
|
+
/*******************************************************************/
|
517
|
+
|
518
|
+
Transducer *Interface::var_value( char *name )
|
519
|
+
|
520
|
+
{
|
521
|
+
VarMap::iterator it=VM.find(name);
|
522
|
+
if (it == VM.end())
|
523
|
+
error2("undefined variable", name);
|
524
|
+
free(name);
|
525
|
+
return &(it->second->copy());
|
526
|
+
}
|
527
|
+
|
528
|
+
|
529
|
+
/*******************************************************************/
|
530
|
+
/* */
|
531
|
+
/* Interface::rvar_value */
|
532
|
+
/* */
|
533
|
+
/*******************************************************************/
|
534
|
+
|
535
|
+
Transducer *Interface::rvar_value( char *name )
|
536
|
+
|
537
|
+
{
|
538
|
+
if (RS.find(name) == RS.end())
|
539
|
+
RS.insert(fst_strdup(name));
|
540
|
+
Range *r=add_value(symbol_code(name), NULL);
|
541
|
+
return new_transducer(r,r);
|
542
|
+
}
|
543
|
+
|
544
|
+
|
545
|
+
/*******************************************************************/
|
546
|
+
/* */
|
547
|
+
/* Interface::explode */
|
548
|
+
/* */
|
549
|
+
/*******************************************************************/
|
550
|
+
|
551
|
+
Transducer *Interface::explode( Transducer *t )
|
552
|
+
|
553
|
+
{
|
554
|
+
if (RS.size() == 0 && RSS.size() == 0)
|
555
|
+
return t;
|
556
|
+
|
557
|
+
t = minimise(t);
|
558
|
+
|
559
|
+
vector<char*> name;
|
560
|
+
for( RVarSet::iterator it=RS.begin(); it!=RS.end(); it++)
|
561
|
+
name.push_back(*it);
|
562
|
+
RS.clear();
|
563
|
+
|
564
|
+
// replace all agreement variables
|
565
|
+
for( size_t i=0; i<name.size(); i++ ) {
|
566
|
+
Transducer *nt = NULL;
|
567
|
+
Label l((Character)TheAlphabet.symbol2code(name[i]));
|
568
|
+
Transducer *vt=var_value(name[i]);
|
569
|
+
|
570
|
+
// enumerate all paths of the transducer
|
571
|
+
vector<Transducer*> it;
|
572
|
+
vt->enumerate_paths(it);
|
573
|
+
delete vt;
|
574
|
+
|
575
|
+
// insert each path
|
576
|
+
for( size_t i=0; i<it.size(); i++ ) {
|
577
|
+
|
578
|
+
// insertion
|
579
|
+
Transducer *t1 = &t->splice(l, it[i]);
|
580
|
+
delete it[i];
|
581
|
+
|
582
|
+
if (nt == NULL)
|
583
|
+
nt = t1;
|
584
|
+
else
|
585
|
+
nt = disjunction(nt, t1);
|
586
|
+
}
|
587
|
+
delete t;
|
588
|
+
t = nt;
|
589
|
+
}
|
590
|
+
|
591
|
+
name.clear();
|
592
|
+
for( RVarSet::iterator it=RSS.begin(); it!=RSS.end(); it++)
|
593
|
+
name.push_back(*it);
|
594
|
+
RSS.clear();
|
595
|
+
|
596
|
+
// replace all agreement variables
|
597
|
+
for( size_t i=0; i<name.size(); i++ ) {
|
598
|
+
Transducer *nt = NULL;
|
599
|
+
Character c=(Character)TheAlphabet.symbol2code(name[i]);
|
600
|
+
Range *r=svar_value(name[i]);
|
601
|
+
|
602
|
+
// insert each character
|
603
|
+
while (r != NULL) {
|
604
|
+
|
605
|
+
// insertion
|
606
|
+
Transducer *t1 = &t->replace_char(c, r->character);
|
607
|
+
|
608
|
+
if (nt == NULL)
|
609
|
+
nt = t1;
|
610
|
+
else
|
611
|
+
nt = disjunction(nt, t1);
|
612
|
+
|
613
|
+
Range *next = r->next;
|
614
|
+
delete r;
|
615
|
+
r = next;
|
616
|
+
}
|
617
|
+
delete t;
|
618
|
+
t = nt;
|
619
|
+
}
|
620
|
+
|
621
|
+
return t;
|
622
|
+
}
|
623
|
+
|
624
|
+
|
625
|
+
/*******************************************************************/
|
626
|
+
/* */
|
627
|
+
/* Interface::catenate */
|
628
|
+
/* */
|
629
|
+
/*******************************************************************/
|
630
|
+
|
631
|
+
Transducer *Interface::catenate( Transducer *t1, Transducer *t2 )
|
632
|
+
|
633
|
+
{
|
634
|
+
Transducer *t = &(*t1 + *t2);
|
635
|
+
delete t1;
|
636
|
+
delete t2;
|
637
|
+
return t;
|
638
|
+
}
|
639
|
+
|
640
|
+
|
641
|
+
/*******************************************************************/
|
642
|
+
/* */
|
643
|
+
/* Interface::add_range */
|
644
|
+
/* */
|
645
|
+
/*******************************************************************/
|
646
|
+
|
647
|
+
Ranges *Interface::add_range( Range *r, Ranges *l )
|
648
|
+
|
649
|
+
{
|
650
|
+
Ranges *result = new Ranges;
|
651
|
+
result->range = r;
|
652
|
+
result->next = l;
|
653
|
+
return result;
|
654
|
+
}
|
655
|
+
|
656
|
+
|
657
|
+
/*******************************************************************/
|
658
|
+
/* */
|
659
|
+
/* Interface::make_mapping */
|
660
|
+
/* */
|
661
|
+
/*******************************************************************/
|
662
|
+
|
663
|
+
Transducer *Interface::make_mapping( Ranges *list1, Ranges *list2 )
|
664
|
+
|
665
|
+
{
|
666
|
+
Ranges *l1=list1;
|
667
|
+
Ranges *l2=list2;
|
668
|
+
Transducer *t=new Transducer();
|
669
|
+
|
670
|
+
Node *node=t->root_node();
|
671
|
+
while (l1 && l2) {
|
672
|
+
Node *nn=t->new_node();
|
673
|
+
for( Range *r1=l1->range; r1; r1=r1->next )
|
674
|
+
for( Range *r2=l2->range; r2; r2=r2->next )
|
675
|
+
node->add_arc( Label(r1->character, r2->character), nn, t );
|
676
|
+
node = nn;
|
677
|
+
l1 = l1->next;
|
678
|
+
l2 = l2->next;
|
679
|
+
}
|
680
|
+
while (l1) {
|
681
|
+
Node *nn=t->new_node();
|
682
|
+
for( Range *r1=l1->range; r1; r1=r1->next )
|
683
|
+
node->add_arc( Label(r1->character, Label::epsilon), nn, t );
|
684
|
+
node = nn;
|
685
|
+
l1 = l1->next;
|
686
|
+
}
|
687
|
+
while (l2) {
|
688
|
+
Node *nn=t->new_node();
|
689
|
+
for( Range *r2=l2->range; r2; r2=r2->next )
|
690
|
+
node->add_arc( Label(Label::epsilon, r2->character), nn, t );
|
691
|
+
node = nn;
|
692
|
+
l2 = l2->next;
|
693
|
+
}
|
694
|
+
node->set_final(1);
|
695
|
+
|
696
|
+
free_values(list1);
|
697
|
+
free_values(list2);
|
698
|
+
return t;
|
699
|
+
}
|
700
|
+
|
701
|
+
|
702
|
+
/*******************************************************************/
|
703
|
+
/* */
|
704
|
+
/* Interface::disjunction */
|
705
|
+
/* */
|
706
|
+
/*******************************************************************/
|
707
|
+
|
708
|
+
Transducer *Interface::disjunction( Transducer *t1, Transducer *t2 )
|
709
|
+
|
710
|
+
{
|
711
|
+
Transducer *t = &(*t1 | *t2);
|
712
|
+
delete t1;
|
713
|
+
delete t2;
|
714
|
+
return t;
|
715
|
+
}
|
716
|
+
|
717
|
+
|
718
|
+
/*******************************************************************/
|
719
|
+
/* */
|
720
|
+
/* Interface::conjunction */
|
721
|
+
/* */
|
722
|
+
/*******************************************************************/
|
723
|
+
|
724
|
+
Transducer *Interface::conjunction( Transducer *t1, Transducer *t2 )
|
725
|
+
|
726
|
+
{
|
727
|
+
if (RS.size() > 0 || RSS.size() > 0)
|
728
|
+
cerr << "\nWarning: agreement operation inside of conjunction!\n";
|
729
|
+
Transducer *t = &(*t1 & *t2);
|
730
|
+
delete t1;
|
731
|
+
delete t2;
|
732
|
+
return t;
|
733
|
+
}
|
734
|
+
|
735
|
+
|
736
|
+
/*******************************************************************/
|
737
|
+
/* */
|
738
|
+
/* Interface::subtraction */
|
739
|
+
/* */
|
740
|
+
/*******************************************************************/
|
741
|
+
|
742
|
+
Transducer *Interface::subtraction( Transducer *t1, Transducer *t2 )
|
743
|
+
|
744
|
+
{
|
745
|
+
t1->alphabet.copy(TheAlphabet);
|
746
|
+
|
747
|
+
if (RS.size() > 0 || RSS.size() > 0)
|
748
|
+
cerr << "\nWarning: agreement operation inside of conjunction!\n";
|
749
|
+
Transducer *t = &(*t1 / *t2);
|
750
|
+
delete t1;
|
751
|
+
delete t2;
|
752
|
+
return t;
|
753
|
+
}
|
754
|
+
|
755
|
+
|
756
|
+
/*******************************************************************/
|
757
|
+
/* */
|
758
|
+
/* Interface::composition */
|
759
|
+
/* */
|
760
|
+
/*******************************************************************/
|
761
|
+
|
762
|
+
Transducer *Interface::composition( Transducer *t1, Transducer *t2 )
|
763
|
+
|
764
|
+
{
|
765
|
+
if (RS.size() > 0 || RSS.size() > 0)
|
766
|
+
cerr << "\nWarning: agreement operation inside of composition!\n";
|
767
|
+
Transducer *t = &(*t1 || *t2);
|
768
|
+
delete t1;
|
769
|
+
delete t2;
|
770
|
+
return t;
|
771
|
+
}
|
772
|
+
|
773
|
+
/*******************************************************************/
|
774
|
+
/* */
|
775
|
+
/* Interface::freely_insert */
|
776
|
+
/* */
|
777
|
+
/*******************************************************************/
|
778
|
+
|
779
|
+
Transducer *Interface::freely_insert( Transducer *t,
|
780
|
+
Character lc, Character uc )
|
781
|
+
{
|
782
|
+
return &t->freely_insert(Label(lc,uc));
|
783
|
+
}
|
784
|
+
|
785
|
+
|
786
|
+
/*******************************************************************/
|
787
|
+
/* */
|
788
|
+
/* Interface::negation */
|
789
|
+
/* */
|
790
|
+
/*******************************************************************/
|
791
|
+
|
792
|
+
Transducer *Interface::negation( Transducer *t )
|
793
|
+
|
794
|
+
{
|
795
|
+
if (RS.size() > 0 || RSS.size() > 0)
|
796
|
+
cerr << "\nWarning: agreement operation inside of negation!\n";
|
797
|
+
if (!Alphabet_Defined)
|
798
|
+
error("Negation requires the definition of an alphabet");
|
799
|
+
t->alphabet.clear_char_pairs();
|
800
|
+
t->alphabet.copy(TheAlphabet);
|
801
|
+
Transducer *nt = &(!*t);
|
802
|
+
delete t;
|
803
|
+
return nt;
|
804
|
+
}
|
805
|
+
|
806
|
+
|
807
|
+
/*******************************************************************/
|
808
|
+
/* */
|
809
|
+
/* Interface::upper_level */
|
810
|
+
/* */
|
811
|
+
/*******************************************************************/
|
812
|
+
|
813
|
+
Transducer *Interface::upper_level( Transducer *t )
|
814
|
+
|
815
|
+
{
|
816
|
+
Transducer *nt = &t->upper_level();
|
817
|
+
delete t;
|
818
|
+
return nt;
|
819
|
+
}
|
820
|
+
|
821
|
+
|
822
|
+
/*******************************************************************/
|
823
|
+
/* */
|
824
|
+
/* Interface::lower_level */
|
825
|
+
/* */
|
826
|
+
/*******************************************************************/
|
827
|
+
|
828
|
+
Transducer *Interface::lower_level( Transducer *t )
|
829
|
+
|
830
|
+
{
|
831
|
+
Transducer *nt = &t->lower_level();
|
832
|
+
delete t;
|
833
|
+
return nt;
|
834
|
+
}
|
835
|
+
|
836
|
+
|
837
|
+
/*******************************************************************/
|
838
|
+
/* */
|
839
|
+
/* Interface::minimise */
|
840
|
+
/* */
|
841
|
+
/*******************************************************************/
|
842
|
+
|
843
|
+
Transducer *Interface::minimise( Transducer *t )
|
844
|
+
|
845
|
+
{
|
846
|
+
t->alphabet.copy(TheAlphabet);
|
847
|
+
Transducer *nt = &t->minimise( Verbose );
|
848
|
+
delete t;
|
849
|
+
return nt;
|
850
|
+
}
|
851
|
+
|
852
|
+
|
853
|
+
/*******************************************************************/
|
854
|
+
/* */
|
855
|
+
/* Interface::switch_levels */
|
856
|
+
/* */
|
857
|
+
/*******************************************************************/
|
858
|
+
|
859
|
+
Transducer *Interface::switch_levels( Transducer *t )
|
860
|
+
|
861
|
+
{
|
862
|
+
Transducer *nt = &t->switch_levels();
|
863
|
+
delete t;
|
864
|
+
return nt;
|
865
|
+
}
|
866
|
+
|
867
|
+
|
868
|
+
/*******************************************************************/
|
869
|
+
/* */
|
870
|
+
/* Interface::repetition */
|
871
|
+
/* */
|
872
|
+
/*******************************************************************/
|
873
|
+
|
874
|
+
Transducer *Interface::repetition( Transducer *t )
|
875
|
+
|
876
|
+
{
|
877
|
+
Transducer *nt = &(t->kleene_star());
|
878
|
+
delete t;
|
879
|
+
return nt;
|
880
|
+
}
|
881
|
+
|
882
|
+
|
883
|
+
/*******************************************************************/
|
884
|
+
/* */
|
885
|
+
/* Interface::repetition2 */
|
886
|
+
/* */
|
887
|
+
/*******************************************************************/
|
888
|
+
|
889
|
+
Transducer *Interface::repetition2( Transducer *t )
|
890
|
+
|
891
|
+
{
|
892
|
+
Transducer *t1 = &(t->kleene_star());
|
893
|
+
Transducer *nt = &(*t + *t1);
|
894
|
+
delete t;
|
895
|
+
delete t1;
|
896
|
+
return nt;
|
897
|
+
}
|
898
|
+
|
899
|
+
|
900
|
+
/*******************************************************************/
|
901
|
+
/* */
|
902
|
+
/* Interface::optional */
|
903
|
+
/* */
|
904
|
+
/*******************************************************************/
|
905
|
+
|
906
|
+
Transducer *Interface::optional( Transducer *t )
|
907
|
+
|
908
|
+
{
|
909
|
+
Transducer *nt = &(t->copy());
|
910
|
+
nt->root_node()->set_final(1);
|
911
|
+
delete t;
|
912
|
+
return nt;
|
913
|
+
}
|
914
|
+
|
915
|
+
|
916
|
+
/*******************************************************************/
|
917
|
+
/* */
|
918
|
+
/* Interface::add_pi_transitions */
|
919
|
+
/* */
|
920
|
+
/*******************************************************************/
|
921
|
+
|
922
|
+
void Interface::add_pi_transitions( Transducer *t, Node *node, Alphabet &alph)
|
923
|
+
|
924
|
+
{
|
925
|
+
for( Alphabet::const_iterator it=alph.begin(); it!=alph.end(); it++) {
|
926
|
+
Label l = *it;
|
927
|
+
node->add_arc( l, node, t );
|
928
|
+
}
|
929
|
+
}
|
930
|
+
|
931
|
+
|
932
|
+
/*******************************************************************/
|
933
|
+
/* */
|
934
|
+
/* Interface::pi_machine */
|
935
|
+
/* */
|
936
|
+
/*******************************************************************/
|
937
|
+
|
938
|
+
Transducer *Interface::pi_machine( Alphabet &alph )
|
939
|
+
|
940
|
+
{
|
941
|
+
Transducer *t=new Transducer();
|
942
|
+
t->root_node()->set_final(1);
|
943
|
+
add_pi_transitions( t, t->root_node(), alph );
|
944
|
+
return t;
|
945
|
+
}
|
946
|
+
|
947
|
+
|
948
|
+
/*******************************************************************/
|
949
|
+
/* */
|
950
|
+
/* Interface::empty_string_transducer */
|
951
|
+
/* */
|
952
|
+
/*******************************************************************/
|
953
|
+
|
954
|
+
Transducer *Interface::empty_string_transducer( void )
|
955
|
+
|
956
|
+
{
|
957
|
+
Transducer *t=new Transducer();
|
958
|
+
t->root_node()->set_final(1);
|
959
|
+
return t;
|
960
|
+
}
|
961
|
+
|
962
|
+
|
963
|
+
/*******************************************************************/
|
964
|
+
/* */
|
965
|
+
/* Interface::cp */
|
966
|
+
/* */
|
967
|
+
/*******************************************************************/
|
968
|
+
|
969
|
+
Transducer *Interface::cp( Range *lower_range, Range *upper_range )
|
970
|
+
|
971
|
+
{
|
972
|
+
Transducer *t = make_transducer(lower_range, upper_range);
|
973
|
+
for( ArcsIter p(t->root_node()->arcs()); p; p++ ) {
|
974
|
+
Arc *arc=p;
|
975
|
+
if (TheAlphabet.find(arc->label()) == TheAlphabet.end())
|
976
|
+
fprintf(stderr,"Warning: 2-level rule mapping \"%s\" not defined in alphabet!\n",
|
977
|
+
TheAlphabet.write_label(arc->label()));
|
978
|
+
}
|
979
|
+
|
980
|
+
return t;
|
981
|
+
}
|
982
|
+
|
983
|
+
|
984
|
+
/*******************************************************************/
|
985
|
+
/* */
|
986
|
+
/* Interface::anti_cp */
|
987
|
+
/* */
|
988
|
+
/*******************************************************************/
|
989
|
+
|
990
|
+
Transducer *Interface::anti_cp( Range *lower_range, Range *upper_range )
|
991
|
+
|
992
|
+
{
|
993
|
+
Transducer *cpt = cp(lower_range, upper_range);
|
994
|
+
Transducer *t=new Transducer();
|
995
|
+
Node *node=t->new_node();
|
996
|
+
|
997
|
+
node->set_final(1);
|
998
|
+
for(Alphabet::const_iterator it=TheAlphabet.begin();
|
999
|
+
it!=TheAlphabet.end(); it++){
|
1000
|
+
Label l=*it;
|
1001
|
+
if (in_range(l.lower_char(), lower_range) &&
|
1002
|
+
!cpt->root_node()->target_node(l))
|
1003
|
+
t->root_node()->add_arc( l, node, t );
|
1004
|
+
}
|
1005
|
+
if (in_range(Label::epsilon, lower_range) &&
|
1006
|
+
!cpt->root_node()->target_node(Label()))
|
1007
|
+
t->root_node()->add_arc( Label(), node, t );
|
1008
|
+
|
1009
|
+
delete cpt;
|
1010
|
+
return t;
|
1011
|
+
}
|
1012
|
+
|
1013
|
+
|
1014
|
+
/*******************************************************************/
|
1015
|
+
/* */
|
1016
|
+
/* Interface::twol_right_rule */
|
1017
|
+
/* */
|
1018
|
+
/*******************************************************************/
|
1019
|
+
|
1020
|
+
Transducer *Interface::twol_right_rule( Transducer *lc, Range *lower_range,
|
1021
|
+
Range *upper_range, Transducer *rc )
|
1022
|
+
{
|
1023
|
+
// Build the rule transducer
|
1024
|
+
Transducer *cpt = cp(lower_range, upper_range);
|
1025
|
+
Transducer *pi=pi_machine(TheAlphabet);
|
1026
|
+
|
1027
|
+
// First unwanted language
|
1028
|
+
|
1029
|
+
lc->alphabet.copy(TheAlphabet);
|
1030
|
+
Transducer *notlc = &(!*lc);
|
1031
|
+
Transducer *tmp = &(*notlc + *cpt);
|
1032
|
+
delete notlc;
|
1033
|
+
Transducer *t1 = &(*tmp + *pi);
|
1034
|
+
delete tmp;
|
1035
|
+
|
1036
|
+
// Second unwanted language
|
1037
|
+
rc->alphabet.copy(TheAlphabet);
|
1038
|
+
Transducer *notrc = &(!*rc);
|
1039
|
+
tmp = &(*cpt + *notrc);
|
1040
|
+
delete cpt;
|
1041
|
+
delete notrc;
|
1042
|
+
Transducer *t2 = &(*pi + *tmp);
|
1043
|
+
delete pi;
|
1044
|
+
delete tmp;
|
1045
|
+
|
1046
|
+
tmp = &(*t1|*t2);
|
1047
|
+
delete t1;
|
1048
|
+
delete t2;
|
1049
|
+
|
1050
|
+
tmp->alphabet.copy(TheAlphabet);
|
1051
|
+
t1 = &(!*tmp);
|
1052
|
+
delete tmp;
|
1053
|
+
|
1054
|
+
return t1;
|
1055
|
+
}
|
1056
|
+
|
1057
|
+
|
1058
|
+
/*******************************************************************/
|
1059
|
+
/* */
|
1060
|
+
/* Interface::twol_left_rule */
|
1061
|
+
/* */
|
1062
|
+
/*******************************************************************/
|
1063
|
+
|
1064
|
+
Transducer *Interface::twol_left_rule( Transducer *lc, Range *lower_range,
|
1065
|
+
Range *upper_range, Transducer *rc )
|
1066
|
+
{
|
1067
|
+
// check for problematic insertion operations like "$L <> <= a $R"
|
1068
|
+
// where either $L or $R includes the empty string
|
1069
|
+
if (in_range(Label::epsilon, lower_range)) {
|
1070
|
+
if (lc->generates_empty_string())
|
1071
|
+
error("in two level rule: insertion operation with deletable left context!");
|
1072
|
+
if (rc->generates_empty_string())
|
1073
|
+
error("in two level rule: insertion operation with deletable right context!");
|
1074
|
+
cerr << "\nWarning: two level rule used for insertion operation (might produce unexpected results)\n";
|
1075
|
+
}
|
1076
|
+
|
1077
|
+
// Build the rule transducer
|
1078
|
+
Transducer *t1 = anti_cp(lower_range, upper_range);
|
1079
|
+
|
1080
|
+
// Add the left context;
|
1081
|
+
Transducer *t2 = &(*lc + *t1);
|
1082
|
+
delete t1;
|
1083
|
+
|
1084
|
+
// Add the right context;
|
1085
|
+
t1 = &(*t2 + *rc);
|
1086
|
+
delete t2;
|
1087
|
+
|
1088
|
+
// Form the complement
|
1089
|
+
t1->alphabet.copy(TheAlphabet);
|
1090
|
+
t2 = &(!*t1);
|
1091
|
+
delete t1;
|
1092
|
+
|
1093
|
+
return t2;
|
1094
|
+
}
|
1095
|
+
|
1096
|
+
|
1097
|
+
/*******************************************************************/
|
1098
|
+
/* */
|
1099
|
+
/* Interface::make_rule */
|
1100
|
+
/* */
|
1101
|
+
/*******************************************************************/
|
1102
|
+
|
1103
|
+
Transducer *Interface::make_rule( Transducer *lc, Range *lower_range,
|
1104
|
+
Twol_Type type, Range *upper_range,
|
1105
|
+
Transducer *rc )
|
1106
|
+
{
|
1107
|
+
if (RS.size() > 0 || RSS.size() > 0)
|
1108
|
+
cerr << "\nWarning: agreement operation inside of replacement rule!\n";
|
1109
|
+
|
1110
|
+
if (!Alphabet_Defined)
|
1111
|
+
error("Two level rules require the definition of an alphabet");
|
1112
|
+
|
1113
|
+
// expand the left and the right contexts to their full length
|
1114
|
+
Transducer *pi=pi_machine(TheAlphabet);
|
1115
|
+
|
1116
|
+
if (lc == NULL)
|
1117
|
+
lc = pi_machine(TheAlphabet);
|
1118
|
+
else {
|
1119
|
+
Transducer *tmp = &(*pi + *lc);
|
1120
|
+
delete lc;
|
1121
|
+
lc = tmp;
|
1122
|
+
}
|
1123
|
+
if (rc == NULL)
|
1124
|
+
rc = pi_machine(TheAlphabet);
|
1125
|
+
else {
|
1126
|
+
Transducer *tmp = &(*rc + *pi);
|
1127
|
+
delete rc;
|
1128
|
+
rc = tmp;
|
1129
|
+
}
|
1130
|
+
delete pi;
|
1131
|
+
|
1132
|
+
Transducer *result = NULL;
|
1133
|
+
|
1134
|
+
switch (type) {
|
1135
|
+
case twol_left:
|
1136
|
+
result = twol_left_rule(lc, lower_range, upper_range, rc);
|
1137
|
+
break;
|
1138
|
+
case twol_right:
|
1139
|
+
result = twol_right_rule(lc, lower_range, upper_range, rc);
|
1140
|
+
break;
|
1141
|
+
case twol_both:
|
1142
|
+
{
|
1143
|
+
Transducer *t1 = twol_left_rule(lc, lower_range, upper_range, rc);
|
1144
|
+
Transducer *t2 = twol_right_rule(lc, lower_range, upper_range, rc);
|
1145
|
+
result = &(*t1 & *t2);
|
1146
|
+
delete t1;
|
1147
|
+
delete t2;
|
1148
|
+
}
|
1149
|
+
}
|
1150
|
+
delete lc;
|
1151
|
+
delete rc;
|
1152
|
+
if (lower_range != upper_range)
|
1153
|
+
free_values(lower_range);
|
1154
|
+
free_values(upper_range);
|
1155
|
+
|
1156
|
+
return minimise(result);
|
1157
|
+
}
|
1158
|
+
|
1159
|
+
|
1160
|
+
/*******************************************************************/
|
1161
|
+
/* */
|
1162
|
+
/* Interface::make_context */
|
1163
|
+
/* */
|
1164
|
+
/*******************************************************************/
|
1165
|
+
|
1166
|
+
Contexts *Interface::make_context( Transducer *l, Transducer *r )
|
1167
|
+
|
1168
|
+
{
|
1169
|
+
if (l == NULL)
|
1170
|
+
l = empty_string_transducer();
|
1171
|
+
if (r == NULL)
|
1172
|
+
r = empty_string_transducer();
|
1173
|
+
|
1174
|
+
Contexts *c=new Contexts();
|
1175
|
+
c->left = l;
|
1176
|
+
c->right = r;
|
1177
|
+
c->next = NULL;
|
1178
|
+
|
1179
|
+
return c;
|
1180
|
+
}
|
1181
|
+
|
1182
|
+
|
1183
|
+
/*******************************************************************/
|
1184
|
+
/* */
|
1185
|
+
/* Interface::add_context */
|
1186
|
+
/* */
|
1187
|
+
/*******************************************************************/
|
1188
|
+
|
1189
|
+
Contexts *Interface::add_context( Contexts *nc, Contexts *c )
|
1190
|
+
|
1191
|
+
{
|
1192
|
+
nc->next = c;
|
1193
|
+
return nc;
|
1194
|
+
}
|
1195
|
+
|
1196
|
+
|
1197
|
+
/*******************************************************************/
|
1198
|
+
/* */
|
1199
|
+
/* Interface::restriction_transducer */
|
1200
|
+
/* */
|
1201
|
+
/*******************************************************************/
|
1202
|
+
|
1203
|
+
Transducer *Interface::restriction_transducer( Transducer *l1, Transducer *l2,
|
1204
|
+
Character marker )
|
1205
|
+
{
|
1206
|
+
l1->alphabet.copy(TheAlphabet);
|
1207
|
+
Transducer *t1 = &(*l1 / *l2);
|
1208
|
+
|
1209
|
+
Transducer *t2 = &t1->replace_char(marker, Label::epsilon);
|
1210
|
+
delete t1;
|
1211
|
+
|
1212
|
+
t2->alphabet.copy(TheAlphabet);
|
1213
|
+
t1 = &(!*t2);
|
1214
|
+
delete t2;
|
1215
|
+
|
1216
|
+
return t1;
|
1217
|
+
}
|
1218
|
+
|
1219
|
+
|
1220
|
+
/*******************************************************************/
|
1221
|
+
/* */
|
1222
|
+
/* Interface::marker_transducer */
|
1223
|
+
/* */
|
1224
|
+
/*******************************************************************/
|
1225
|
+
|
1226
|
+
Transducer *Interface::marker_transducer( Transducer *t, Contexts *c,
|
1227
|
+
Character &marker )
|
1228
|
+
{
|
1229
|
+
marker = TheAlphabet.new_marker();
|
1230
|
+
Transducer *result = one_label_transducer( Label(marker) );
|
1231
|
+
|
1232
|
+
// build the alphabet with a new marker
|
1233
|
+
result->alphabet.insert_symbols(t->alphabet);
|
1234
|
+
while (c) {
|
1235
|
+
result->alphabet.insert_symbols(c->left->alphabet);
|
1236
|
+
result->alphabet.insert_symbols(c->right->alphabet);
|
1237
|
+
c = c->next;
|
1238
|
+
}
|
1239
|
+
|
1240
|
+
return result;
|
1241
|
+
}
|
1242
|
+
|
1243
|
+
|
1244
|
+
/*******************************************************************/
|
1245
|
+
/* */
|
1246
|
+
/* Interface::center_transducer */
|
1247
|
+
/* */
|
1248
|
+
/*******************************************************************/
|
1249
|
+
|
1250
|
+
Transducer *Interface::center_transducer( Transducer *t, Transducer *pi,
|
1251
|
+
Transducer *mt )
|
1252
|
+
{
|
1253
|
+
// create the concatenation pi + mt + *t + mt + pi
|
1254
|
+
Transducer *t1=&(*pi + *mt);
|
1255
|
+
Transducer *t2=&(*t1 + *t);
|
1256
|
+
delete t1;
|
1257
|
+
t1 = &(*t2 + *mt);
|
1258
|
+
delete t2;
|
1259
|
+
t2 = &(*t1 + *pi);
|
1260
|
+
delete t1;
|
1261
|
+
return t2;
|
1262
|
+
}
|
1263
|
+
|
1264
|
+
|
1265
|
+
/*******************************************************************/
|
1266
|
+
/* */
|
1267
|
+
/* Interface::context_transducer */
|
1268
|
+
/* */
|
1269
|
+
/*******************************************************************/
|
1270
|
+
|
1271
|
+
Transducer *Interface::context_transducer( Transducer *t, Transducer *pi,
|
1272
|
+
Transducer *mt, Contexts *c )
|
1273
|
+
{
|
1274
|
+
// pi + left[i] + mt + pi + mt + right[i] + pi
|
1275
|
+
|
1276
|
+
Transducer *t1 = &(*mt + *t);
|
1277
|
+
Transducer *tmp = &(*t1 + *mt);
|
1278
|
+
delete t1;
|
1279
|
+
Transducer *result=NULL;
|
1280
|
+
|
1281
|
+
while (c) {
|
1282
|
+
t1 = &(*pi + *c->left);
|
1283
|
+
Transducer *t2 = &(*t1 + *tmp);
|
1284
|
+
delete t1;
|
1285
|
+
t1 = &(*t2 + *c->right);
|
1286
|
+
delete t2;
|
1287
|
+
t2 = &(*t1 + *pi);
|
1288
|
+
delete t1;
|
1289
|
+
|
1290
|
+
if (result) {
|
1291
|
+
t1 = &(*result | *t2);
|
1292
|
+
delete t2;
|
1293
|
+
result = t1;
|
1294
|
+
}
|
1295
|
+
else
|
1296
|
+
result = t2;
|
1297
|
+
|
1298
|
+
c = c->next;
|
1299
|
+
}
|
1300
|
+
delete tmp;
|
1301
|
+
|
1302
|
+
return result;
|
1303
|
+
}
|
1304
|
+
|
1305
|
+
|
1306
|
+
|
1307
|
+
/*******************************************************************/
|
1308
|
+
/* */
|
1309
|
+
/* Interface::result_transducer */
|
1310
|
+
/* */
|
1311
|
+
/*******************************************************************/
|
1312
|
+
|
1313
|
+
Transducer *Interface::result_transducer( Transducer *l1, Transducer *l2,
|
1314
|
+
Twol_Type type, Character marker )
|
1315
|
+
{
|
1316
|
+
Transducer *result=NULL;
|
1317
|
+
if (type == twol_right)
|
1318
|
+
result = restriction_transducer( l1, l2, marker );
|
1319
|
+
else if (type == twol_left)
|
1320
|
+
result = restriction_transducer( l2, l1, marker );
|
1321
|
+
else if (type == twol_both) {
|
1322
|
+
Transducer *t1 = restriction_transducer( l1, l2, marker );
|
1323
|
+
Transducer *t2 = restriction_transducer( l2, l1, marker );
|
1324
|
+
result = &(*t1 & *t2);
|
1325
|
+
delete t1;
|
1326
|
+
delete t2;
|
1327
|
+
}
|
1328
|
+
|
1329
|
+
return result;
|
1330
|
+
}
|
1331
|
+
|
1332
|
+
|
1333
|
+
/*******************************************************************/
|
1334
|
+
/* */
|
1335
|
+
/* Interface::restriction */
|
1336
|
+
/* */
|
1337
|
+
/*******************************************************************/
|
1338
|
+
|
1339
|
+
Transducer *Interface::restriction( Transducer *t, Twol_Type type,
|
1340
|
+
Contexts *c, int direction )
|
1341
|
+
{
|
1342
|
+
Character marker;
|
1343
|
+
Transducer *mt=marker_transducer( t, c, marker );
|
1344
|
+
Transducer *pi=pi_machine(TheAlphabet);
|
1345
|
+
Transducer *l1=center_transducer( t, pi, mt );
|
1346
|
+
|
1347
|
+
Transducer *tmp;
|
1348
|
+
if (direction == 0)
|
1349
|
+
tmp = pi;
|
1350
|
+
else if (direction == 1) {
|
1351
|
+
// compute _t || .*
|
1352
|
+
Transducer *t1 = &t->lower_level();
|
1353
|
+
tmp = &(*t1 || *pi);
|
1354
|
+
delete t1;
|
1355
|
+
}
|
1356
|
+
else {
|
1357
|
+
// compute ^t || .*
|
1358
|
+
Transducer *t1 = &t->upper_level();
|
1359
|
+
tmp = &(*pi || *t1);
|
1360
|
+
delete t1;
|
1361
|
+
}
|
1362
|
+
delete t;
|
1363
|
+
|
1364
|
+
Transducer *l2=context_transducer( tmp, pi, mt, c );
|
1365
|
+
if (tmp != pi)
|
1366
|
+
delete tmp;
|
1367
|
+
delete pi;
|
1368
|
+
delete mt;
|
1369
|
+
|
1370
|
+
Transducer *result=result_transducer( l1, l2, type, marker );
|
1371
|
+
delete l1;
|
1372
|
+
delete l2;
|
1373
|
+
|
1374
|
+
free_contexts( c );
|
1375
|
+
|
1376
|
+
return result;
|
1377
|
+
}
|
1378
|
+
|
1379
|
+
|
1380
|
+
/*******************************************************************/
|
1381
|
+
/* */
|
1382
|
+
/* Interface::insert_boundary_transducer */
|
1383
|
+
/* */
|
1384
|
+
/*******************************************************************/
|
1385
|
+
|
1386
|
+
Transducer *Interface::insert_boundary_transducer( Character leftm, Character rightm,
|
1387
|
+
Alphabet &alph )
|
1388
|
+
{
|
1389
|
+
// Create the insert boundaries transducer (.|<>:<L>|<>:<R>)*
|
1390
|
+
|
1391
|
+
Transducer *result=pi_machine( alph );
|
1392
|
+
Node *root=result->root_node();
|
1393
|
+
root->add_arc( Label(Label::epsilon, leftm), root, result);
|
1394
|
+
root->add_arc( Label(Label::epsilon, rightm),root, result);
|
1395
|
+
|
1396
|
+
return result;
|
1397
|
+
}
|
1398
|
+
|
1399
|
+
|
1400
|
+
/*******************************************************************/
|
1401
|
+
/* */
|
1402
|
+
/* Interface::remove_boundary_transducer */
|
1403
|
+
/* */
|
1404
|
+
/*******************************************************************/
|
1405
|
+
|
1406
|
+
Transducer *Interface::remove_boundary_transducer( Character leftm, Character rightm,
|
1407
|
+
Alphabet &alph )
|
1408
|
+
{
|
1409
|
+
// Create the remove boundaries transducer (.|<L>:<>|<R>:<>)*
|
1410
|
+
|
1411
|
+
Transducer *result=pi_machine( alph );
|
1412
|
+
Node *root = result->root_node();
|
1413
|
+
root->add_arc( Label(leftm, Label::epsilon), root, result);
|
1414
|
+
root->add_arc( Label(rightm,Label::epsilon), root, result);
|
1415
|
+
|
1416
|
+
return result;
|
1417
|
+
}
|
1418
|
+
|
1419
|
+
|
1420
|
+
/*******************************************************************/
|
1421
|
+
/* */
|
1422
|
+
/* Interface::constrain_boundary_transducer */
|
1423
|
+
/* */
|
1424
|
+
/*******************************************************************/
|
1425
|
+
|
1426
|
+
Transducer *Interface::constrain_boundary_transducer( Character leftm,
|
1427
|
+
Character rightm,
|
1428
|
+
Alphabet &alph)
|
1429
|
+
{
|
1430
|
+
// create the transducer (.|<L>|<R>)*
|
1431
|
+
|
1432
|
+
Transducer *tmp=pi_machine( alph );
|
1433
|
+
|
1434
|
+
// create the transducer (.|<L>|<R>)* <L><R> (.|<L>|<R>)*
|
1435
|
+
Node *root = tmp->root_node();
|
1436
|
+
Node *node = tmp->new_node();
|
1437
|
+
Node *last = tmp->new_node();
|
1438
|
+
|
1439
|
+
root->set_final(0);
|
1440
|
+
last->set_final(1);
|
1441
|
+
|
1442
|
+
root->add_arc( Label(leftm), node, tmp);
|
1443
|
+
node->add_arc( Label(rightm), last, tmp);
|
1444
|
+
|
1445
|
+
add_pi_transitions( tmp, last, alph );
|
1446
|
+
|
1447
|
+
// create the transducer !((.|<L>|<R>)* <L><R> (.|<L>|<R>)*)
|
1448
|
+
tmp->alphabet.copy(alph);
|
1449
|
+
Transducer *result = &(!*tmp);
|
1450
|
+
delete tmp;
|
1451
|
+
|
1452
|
+
return result;
|
1453
|
+
}
|
1454
|
+
|
1455
|
+
|
1456
|
+
/*******************************************************************/
|
1457
|
+
/* */
|
1458
|
+
/* Interface::extended_left_transducer */
|
1459
|
+
/* */
|
1460
|
+
/*******************************************************************/
|
1461
|
+
|
1462
|
+
Transducer *Interface::extended_left_transducer( Transducer *t, Character m1,
|
1463
|
+
Character m2, Alphabet &alpha )
|
1464
|
+
{
|
1465
|
+
if (t == NULL) // empty context
|
1466
|
+
return pi_machine(alpha);
|
1467
|
+
|
1468
|
+
// Extended left context transducer
|
1469
|
+
|
1470
|
+
// <R> >> (<L> >> $T$)
|
1471
|
+
Transducer *tmp=&t->freely_insert( Label(m1) );
|
1472
|
+
delete t;
|
1473
|
+
t = &tmp->freely_insert( Label(m2) );
|
1474
|
+
delete tmp;
|
1475
|
+
|
1476
|
+
// .* (<R> >> (<L> >> $T$))
|
1477
|
+
add_pi_transitions( t, t->root_node(), alpha );
|
1478
|
+
|
1479
|
+
// !(.*<L>)
|
1480
|
+
tmp = one_label_transducer(Label(m1));
|
1481
|
+
add_pi_transitions( tmp, tmp->root_node(), alpha );
|
1482
|
+
tmp->alphabet.copy(alpha);
|
1483
|
+
Transducer *t2 = &(!*tmp);
|
1484
|
+
delete tmp;
|
1485
|
+
|
1486
|
+
// .* (<R> >> (<L> >> $T$)) || !(.*<L>)
|
1487
|
+
tmp = &(*t || *t2);
|
1488
|
+
delete t;
|
1489
|
+
delete t2;
|
1490
|
+
|
1491
|
+
return tmp;
|
1492
|
+
}
|
1493
|
+
|
1494
|
+
|
1495
|
+
/*******************************************************************/
|
1496
|
+
/* */
|
1497
|
+
/* Interface::left_context */
|
1498
|
+
/* */
|
1499
|
+
/*******************************************************************/
|
1500
|
+
|
1501
|
+
Transducer *Interface::left_context( Transducer *t, Character leftm,
|
1502
|
+
Character rightm, Alphabet &alph )
|
1503
|
+
{
|
1504
|
+
// .* (<R> >> (<L> >> $T$)) || !(.*<L>)
|
1505
|
+
Transducer *ct = extended_left_transducer(t, leftm, rightm, alph);
|
1506
|
+
|
1507
|
+
// <L>
|
1508
|
+
Transducer *mt = one_label_transducer(Label(leftm));
|
1509
|
+
// <R>* <L>
|
1510
|
+
mt->root_node()->add_arc(Label(rightm), mt->root_node(), mt );
|
1511
|
+
// <R>* <L> .*
|
1512
|
+
add_pi_transitions(mt, mt->root_node()->target_node(Label(leftm)), alph);
|
1513
|
+
|
1514
|
+
ct->alphabet.copy( alph );
|
1515
|
+
Transducer *no_ct = &!*ct;
|
1516
|
+
|
1517
|
+
mt->alphabet.copy(alph);
|
1518
|
+
Transducer *no_mt = &!*mt;
|
1519
|
+
|
1520
|
+
Transducer *t1 = &(*no_ct + *mt);
|
1521
|
+
delete no_ct;
|
1522
|
+
delete mt;
|
1523
|
+
|
1524
|
+
Transducer *t2 = &(*ct + *no_mt);
|
1525
|
+
delete ct;
|
1526
|
+
delete no_mt;
|
1527
|
+
|
1528
|
+
Transducer *tmp = &(*t1 | *t2);
|
1529
|
+
delete t1;
|
1530
|
+
delete t2;
|
1531
|
+
|
1532
|
+
tmp->alphabet.copy( alph );
|
1533
|
+
t1 = &!*tmp;
|
1534
|
+
delete tmp;
|
1535
|
+
|
1536
|
+
return t1;
|
1537
|
+
}
|
1538
|
+
|
1539
|
+
|
1540
|
+
/*******************************************************************/
|
1541
|
+
/* */
|
1542
|
+
/* Interface::right_context */
|
1543
|
+
/* */
|
1544
|
+
/*******************************************************************/
|
1545
|
+
|
1546
|
+
Transducer *Interface::right_context( Transducer *t, Character leftm,
|
1547
|
+
Character rightm, Alphabet &alph )
|
1548
|
+
{
|
1549
|
+
// right context transducer: (<R> >> (<L> >> $T$)) .* || !(<R>.*)
|
1550
|
+
Transducer *tmp = &t->reverse();
|
1551
|
+
delete t;
|
1552
|
+
Transducer *t2 = left_context(tmp, rightm, leftm, alph);
|
1553
|
+
Transducer *result = &t2->reverse();
|
1554
|
+
delete t2;
|
1555
|
+
return result;
|
1556
|
+
}
|
1557
|
+
|
1558
|
+
|
1559
|
+
/*******************************************************************/
|
1560
|
+
/* */
|
1561
|
+
/* Interface::make_optional */
|
1562
|
+
/* */
|
1563
|
+
/*******************************************************************/
|
1564
|
+
|
1565
|
+
Transducer *Interface::make_optional( Transducer *t, Repl_Type type )
|
1566
|
+
|
1567
|
+
{
|
1568
|
+
Transducer *t1;
|
1569
|
+
if (type == my_repl_down)
|
1570
|
+
t1 = &t->upper_level();
|
1571
|
+
else
|
1572
|
+
t1 = &t->lower_level();
|
1573
|
+
|
1574
|
+
Transducer *t2 = &(*t | *t1);
|
1575
|
+
|
1576
|
+
delete t;
|
1577
|
+
delete t1;
|
1578
|
+
|
1579
|
+
return t2;
|
1580
|
+
}
|
1581
|
+
|
1582
|
+
|
1583
|
+
/*******************************************************************/
|
1584
|
+
/* */
|
1585
|
+
/* Interface::replace */
|
1586
|
+
/* */
|
1587
|
+
/*******************************************************************/
|
1588
|
+
|
1589
|
+
Transducer *Interface::replace( Transducer *ct, Repl_Type type,
|
1590
|
+
bool optional )
|
1591
|
+
{
|
1592
|
+
if (optional)
|
1593
|
+
ct = make_optional(ct, type);
|
1594
|
+
|
1595
|
+
// compute the no-center transducer
|
1596
|
+
Transducer *t1=NULL;
|
1597
|
+
|
1598
|
+
Transducer *pi = pi_machine(TheAlphabet);
|
1599
|
+
if (type == repl_up) {
|
1600
|
+
// _ct || .*
|
1601
|
+
Transducer *t2 = &ct->lower_level();
|
1602
|
+
t1 = &(*t2 || *pi);
|
1603
|
+
delete t2;
|
1604
|
+
}
|
1605
|
+
else if (type == my_repl_down) {
|
1606
|
+
// .* || ^ct
|
1607
|
+
Transducer *t2 = &ct->upper_level();
|
1608
|
+
t1 = &(*pi || *t2);
|
1609
|
+
delete t2;
|
1610
|
+
}
|
1611
|
+
else
|
1612
|
+
error("Invalid type of replace operator");
|
1613
|
+
|
1614
|
+
{
|
1615
|
+
// _ct without empty string
|
1616
|
+
Transducer *t2 = empty_string_transducer();
|
1617
|
+
Transducer *t3 = &(*t1 / *t2);
|
1618
|
+
delete t1;
|
1619
|
+
delete t2;
|
1620
|
+
t1 = t3;
|
1621
|
+
}
|
1622
|
+
|
1623
|
+
// .* _ct
|
1624
|
+
Transducer *t2 = &(*pi + *t1);
|
1625
|
+
delete t1;
|
1626
|
+
|
1627
|
+
// .* _ct .*
|
1628
|
+
t1 = &(*t2 + *pi);
|
1629
|
+
delete pi;
|
1630
|
+
delete t2;
|
1631
|
+
|
1632
|
+
// no_ct = !(.* _ct .*)
|
1633
|
+
t1->alphabet.copy(TheAlphabet);
|
1634
|
+
Transducer *no_ct = &(!*t1);
|
1635
|
+
delete t1;
|
1636
|
+
|
1637
|
+
// compute the unconditional replacement transducer
|
1638
|
+
|
1639
|
+
// no-ct ct
|
1640
|
+
t1 = &(*no_ct + *ct);
|
1641
|
+
delete ct;
|
1642
|
+
|
1643
|
+
// (no-ct ct)*
|
1644
|
+
t2 = &(t1->kleene_star());
|
1645
|
+
delete t1;
|
1646
|
+
|
1647
|
+
// (no-ct ct)* no-ct
|
1648
|
+
t1 = &(*t2 + *no_ct);
|
1649
|
+
delete t2;
|
1650
|
+
delete no_ct;
|
1651
|
+
|
1652
|
+
return t1;
|
1653
|
+
}
|
1654
|
+
|
1655
|
+
|
1656
|
+
/*******************************************************************/
|
1657
|
+
/* */
|
1658
|
+
/* Interface::replace_transducer */
|
1659
|
+
/* */
|
1660
|
+
/*******************************************************************/
|
1661
|
+
|
1662
|
+
Transducer *Interface::replace_transducer( Transducer *ct, Character lm,
|
1663
|
+
Character rm, Repl_Type type )
|
1664
|
+
{
|
1665
|
+
// insert boundary markers into the center transducer
|
1666
|
+
|
1667
|
+
// <L> >> (<R> >> $Center$)
|
1668
|
+
Transducer *tmp = &ct->freely_insert(Label(lm));
|
1669
|
+
delete ct;
|
1670
|
+
ct = &tmp->freely_insert(Label(rm));
|
1671
|
+
delete tmp;
|
1672
|
+
|
1673
|
+
// add surrounding boundary markers to the center transducer
|
1674
|
+
|
1675
|
+
// <L> (<L> >> (<R> >> $Center$))
|
1676
|
+
Transducer *t2 = one_label_transducer( Label(lm) );
|
1677
|
+
tmp = &(*t2 + *ct);
|
1678
|
+
delete t2;
|
1679
|
+
delete ct;
|
1680
|
+
|
1681
|
+
// $CenterB$ = <L> (<L> >> (<R> >> $Center$)) <R>
|
1682
|
+
t2 = one_label_transducer( Label(rm) );
|
1683
|
+
ct = &(*tmp + *t2);
|
1684
|
+
delete tmp;
|
1685
|
+
delete t2;
|
1686
|
+
|
1687
|
+
return replace(ct, type, false);
|
1688
|
+
}
|
1689
|
+
|
1690
|
+
|
1691
|
+
/*******************************************************************/
|
1692
|
+
/* */
|
1693
|
+
/* Interface::replace_in_context */
|
1694
|
+
/* */
|
1695
|
+
/*******************************************************************/
|
1696
|
+
|
1697
|
+
Transducer *Interface::replace_in_context( Transducer *t, Repl_Type type,
|
1698
|
+
Contexts *c, bool optional )
|
1699
|
+
{
|
1700
|
+
if (optional)
|
1701
|
+
t = make_optional(t, type);
|
1702
|
+
|
1703
|
+
// The implementation of the replace operators is based on
|
1704
|
+
// "The Replace Operator" by Lauri Karttunen
|
1705
|
+
|
1706
|
+
if (!Alphabet_Defined)
|
1707
|
+
error("The replace operators require the definition of an alphabet");
|
1708
|
+
|
1709
|
+
if (!c->left->is_automaton() || !c->right->is_automaton())
|
1710
|
+
error("The replace operators require automata as context expressions! (Do not include any character mappings x:y between the two parentheses of the operator.)");
|
1711
|
+
|
1712
|
+
if (type == my_repl_down) {
|
1713
|
+
Transducer *t2 = empty_string_transducer();
|
1714
|
+
Transducer *t3 = &(*t || *t2);
|
1715
|
+
if (!t3->is_empty())
|
1716
|
+
cerr << "\nWarning: The source of the replace operation contains the empty string! (Such insertion operations do not work.)\n";
|
1717
|
+
delete t2;
|
1718
|
+
delete t3;
|
1719
|
+
}
|
1720
|
+
else {
|
1721
|
+
Transducer *t2 = empty_string_transducer();
|
1722
|
+
Transducer *t3 = &(*t2 || *t);
|
1723
|
+
if (!t3->is_empty())
|
1724
|
+
cerr << "\nWarning: The source of the replace operation contains the empty string! (Such insertion operations do not work.)\n";
|
1725
|
+
delete t2;
|
1726
|
+
delete t3;
|
1727
|
+
}
|
1728
|
+
|
1729
|
+
// create the marker symbols
|
1730
|
+
Character leftm = TheAlphabet.new_marker();
|
1731
|
+
Character rightm = TheAlphabet.new_marker();
|
1732
|
+
|
1733
|
+
// create the upper and lower alphabets
|
1734
|
+
Alphabet lower_alph;
|
1735
|
+
lower_alph.copy( TheAlphabet, lower );
|
1736
|
+
Alphabet upper_alph;
|
1737
|
+
upper_alph.copy( TheAlphabet, upper );
|
1738
|
+
|
1739
|
+
/////////////////////////////////////////////////////////////
|
1740
|
+
// Create the insert boundaries transducer (.|<>:<L>|<>:<R>)*
|
1741
|
+
/////////////////////////////////////////////////////////////
|
1742
|
+
|
1743
|
+
Transducer *tmp=insert_boundary_transducer( leftm, rightm, lower_alph );
|
1744
|
+
|
1745
|
+
/////////////////////////////////////////////////////////////
|
1746
|
+
// Create the remove boundaries transducer (.|<L>:<>|<R>:<>)*
|
1747
|
+
/////////////////////////////////////////////////////////////
|
1748
|
+
|
1749
|
+
Transducer *rbt=remove_boundary_transducer( leftm, rightm, upper_alph );
|
1750
|
+
|
1751
|
+
// Add the markers to the alphabet
|
1752
|
+
TheAlphabet.insert(Label(leftm));
|
1753
|
+
TheAlphabet.insert(Label(rightm));
|
1754
|
+
lower_alph.insert(Label(leftm));
|
1755
|
+
lower_alph.insert(Label(rightm));
|
1756
|
+
upper_alph.insert(Label(leftm));
|
1757
|
+
upper_alph.insert(Label(rightm));
|
1758
|
+
|
1759
|
+
/////////////////////////////////////////////////////////////
|
1760
|
+
// unconditional replace transducer
|
1761
|
+
/////////////////////////////////////////////////////////////
|
1762
|
+
|
1763
|
+
Transducer *rt;
|
1764
|
+
if (type == my_repl_down)
|
1765
|
+
rt = replace_transducer( t, leftm, rightm, my_repl_down );
|
1766
|
+
else
|
1767
|
+
rt = replace_transducer( t, leftm, rightm, repl_up );
|
1768
|
+
|
1769
|
+
/////////////////////////////////////////////////////////////
|
1770
|
+
// build the conditional replacement transducer
|
1771
|
+
/////////////////////////////////////////////////////////////
|
1772
|
+
|
1773
|
+
if (type != my_repl_down) {
|
1774
|
+
// Create the constrain boundaries transducer !(.*<L><R>.*)
|
1775
|
+
Transducer *cbt=constrain_boundary_transducer(leftm, rightm, lower_alph);
|
1776
|
+
|
1777
|
+
Transducer *t2 = &(*tmp || *cbt);
|
1778
|
+
delete tmp;
|
1779
|
+
delete cbt;
|
1780
|
+
tmp = t2;
|
1781
|
+
}
|
1782
|
+
|
1783
|
+
if (type == repl_up || type == repl_left) {
|
1784
|
+
// left context transducer: .* (<R> >> (<L> >> $T$)) || !(.*<L>)
|
1785
|
+
Transducer *lct = left_context(c->left, leftm, rightm, lower_alph);
|
1786
|
+
|
1787
|
+
Transducer *t2 = &(*tmp || *lct);
|
1788
|
+
delete tmp;
|
1789
|
+
delete lct;
|
1790
|
+
tmp = t2;
|
1791
|
+
}
|
1792
|
+
|
1793
|
+
if (type == repl_up || type == repl_right) {
|
1794
|
+
// right context transducer: (<R> >> (<L> >> $T$)) .* || !(<R>.*)
|
1795
|
+
Transducer *rct = right_context(c->right, leftm, rightm, lower_alph);
|
1796
|
+
|
1797
|
+
Transducer *t2 = &(*tmp || *rct);
|
1798
|
+
delete tmp;
|
1799
|
+
delete rct;
|
1800
|
+
tmp = t2;
|
1801
|
+
}
|
1802
|
+
|
1803
|
+
{
|
1804
|
+
// Apply the replacement transducer
|
1805
|
+
Transducer *t2 = &(*tmp || *rt);
|
1806
|
+
delete tmp;
|
1807
|
+
delete rt;
|
1808
|
+
tmp = t2;
|
1809
|
+
}
|
1810
|
+
|
1811
|
+
if (type == my_repl_down || type == repl_down || type == repl_right) {
|
1812
|
+
// left context transducer: .* (<R> >> (<L> >> $T$)) || !(.*<L>)
|
1813
|
+
Transducer *lct = left_context(c->left, leftm, rightm, upper_alph);
|
1814
|
+
|
1815
|
+
Transducer *t2 = &(*tmp || *lct);
|
1816
|
+
delete tmp;
|
1817
|
+
delete lct;
|
1818
|
+
tmp = t2;
|
1819
|
+
}
|
1820
|
+
if (type == my_repl_down || type == repl_down || type == repl_left) {
|
1821
|
+
// right context transducer: (<R> >> (<L> >> $T$)) .* || !(<R>.*)
|
1822
|
+
Transducer *rct = right_context(c->right, leftm, rightm, upper_alph);
|
1823
|
+
|
1824
|
+
Transducer *t2 = &(*tmp || *rct);
|
1825
|
+
delete tmp;
|
1826
|
+
delete rct;
|
1827
|
+
tmp = t2;
|
1828
|
+
}
|
1829
|
+
|
1830
|
+
if (type == my_repl_down) {
|
1831
|
+
// Create the constrain boundaries transducer !(.*<L><R>.*)
|
1832
|
+
Transducer *cbt=constrain_boundary_transducer(leftm, rightm, upper_alph);
|
1833
|
+
|
1834
|
+
Transducer *t2 = &(*tmp || *cbt);
|
1835
|
+
delete(tmp);
|
1836
|
+
delete(cbt);
|
1837
|
+
tmp = t2;
|
1838
|
+
}
|
1839
|
+
|
1840
|
+
Transducer *result = &(*tmp || *rbt);
|
1841
|
+
delete tmp;
|
1842
|
+
delete rbt;
|
1843
|
+
|
1844
|
+
// Remove the markers from the alphabet
|
1845
|
+
TheAlphabet.delete_markers();
|
1846
|
+
|
1847
|
+
free_contexts( c );
|
1848
|
+
|
1849
|
+
return result;
|
1850
|
+
}
|
1851
|
+
|
1852
|
+
|
1853
|
+
/*******************************************************************/
|
1854
|
+
/* */
|
1855
|
+
/* Interface::add_alphabet */
|
1856
|
+
/* */
|
1857
|
+
/*******************************************************************/
|
1858
|
+
|
1859
|
+
void Interface::add_alphabet( Transducer *t )
|
1860
|
+
|
1861
|
+
{
|
1862
|
+
t->alphabet.copy(TheAlphabet);
|
1863
|
+
t->complete_alphabet();
|
1864
|
+
}
|
1865
|
+
|
1866
|
+
|
1867
|
+
/*******************************************************************/
|
1868
|
+
/* */
|
1869
|
+
/* Interface::write_to_file */
|
1870
|
+
/* */
|
1871
|
+
/*******************************************************************/
|
1872
|
+
|
1873
|
+
void Interface::write_to_file( Transducer *t, char *filename)
|
1874
|
+
|
1875
|
+
{
|
1876
|
+
FILE *file;
|
1877
|
+
if ((file = fopen(filename,"wb")) == NULL) {
|
1878
|
+
fprintf(stderr,"\nError: Cannot open output file \"%s\"\n\n", filename);
|
1879
|
+
exit(1);
|
1880
|
+
}
|
1881
|
+
free( filename );
|
1882
|
+
|
1883
|
+
t = explode(t);
|
1884
|
+
add_alphabet(t);
|
1885
|
+
t = minimise(t);
|
1886
|
+
t->store(file);
|
1887
|
+
fclose(file);
|
1888
|
+
}
|
1889
|
+
|
1890
|
+
|
1891
|
+
/*******************************************************************/
|
1892
|
+
/* */
|
1893
|
+
/* Interface::result */
|
1894
|
+
/* */
|
1895
|
+
/*******************************************************************/
|
1896
|
+
|
1897
|
+
Transducer *Interface::result( Transducer *t, bool switch_flag )
|
1898
|
+
|
1899
|
+
{
|
1900
|
+
t = explode(t);
|
1901
|
+
|
1902
|
+
// delete the variable values
|
1903
|
+
vector<char*> s;
|
1904
|
+
for( VarMap::iterator it=VM.begin(); it != VM.end(); it++ ) {
|
1905
|
+
s.push_back(it->first);
|
1906
|
+
delete it->second;
|
1907
|
+
it->second = NULL;
|
1908
|
+
}
|
1909
|
+
VM.clear();
|
1910
|
+
for( size_t i=0; i<s.size(); i++ )
|
1911
|
+
free(s[i]);
|
1912
|
+
s.clear();
|
1913
|
+
|
1914
|
+
if (switch_flag)
|
1915
|
+
t = switch_levels(t);
|
1916
|
+
add_alphabet(t);
|
1917
|
+
t = minimise(t);
|
1918
|
+
return t;
|
1919
|
+
}
|
1920
|
+
|
1921
|
+
}
|