ruby-sfst 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1838 @@
1
+ /*******************************************************************/
2
+ /* */
3
+ /* FILE interface.C */
4
+ /* MODULE interface */
5
+ /* PROGRAM SFST */
6
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
+ /* */
8
+ /*******************************************************************/
9
+
10
+ #include "interface.h"
11
+
12
+ #include <fstream>
13
+ using std::ifstream;
14
+ using std::ofstream;
15
+
16
+ #include <set>
17
+ using std::set;
18
+
19
+ using __gnu_cxx::hash_map;
20
+ using __gnu_cxx::hash;
21
+ using std::cerr;
22
+ using std::cout;
23
+ using std::vector;
24
+
25
+ struct ltstr {
26
+ bool operator()(const char* s1, const char* s2) const
27
+ { return strcmp(s1, s2) < 0; }
28
+ };
29
+
30
+ struct eqstr {
31
+ bool operator()(const char* s1, const char* s2) const
32
+ { return strcmp(s1, s2) == 0; }
33
+ };
34
+
35
+ typedef set<char*, ltstr> RVarSet;
36
+
37
+ typedef hash_map<char*, Transducer*, hash<const char*>, eqstr> VarMap;
38
+
39
+ typedef hash_map<char*, Range*, hash<const char*>, eqstr> SVarMap;
40
+
41
+ bool Verbose=false;
42
+
43
+ Alphabet TheAlphabet;
44
+
45
+ static VarMap VM;
46
+ static SVarMap SVM;
47
+ static RVarSet RS;
48
+ static RVarSet RSS;
49
+ static int Alphabet_Defined=0;
50
+
51
+
52
+ /*******************************************************************/
53
+ /* */
54
+ /* error */
55
+ /* */
56
+ /*******************************************************************/
57
+
58
+ static void error( const char *message )
59
+
60
+ {
61
+ cerr << "\nError: " << message << "\naborted.\n";
62
+ exit(1);
63
+ }
64
+
65
+
66
+ /*******************************************************************/
67
+ /* */
68
+ /* error2 */
69
+ /* */
70
+ /*******************************************************************/
71
+
72
+ void error2( char *message, char *input )
73
+
74
+ {
75
+ cerr << "\nError: " << message << ": " << input << "\naborted.\n";
76
+ exit(1);
77
+ }
78
+
79
+
80
+ /*******************************************************************/
81
+ /* */
82
+ /* symbol_code */
83
+ /* */
84
+ /*******************************************************************/
85
+
86
+ Character symbol_code( char *symbol )
87
+
88
+ {
89
+ int c=TheAlphabet.symbol2code(symbol);
90
+ if (c == EOF)
91
+ c = TheAlphabet.add_symbol( symbol );
92
+ free(symbol);
93
+ return (Character)c;
94
+ }
95
+
96
+
97
+ /*******************************************************************/
98
+ /* */
99
+ /* character_code */
100
+ /* */
101
+ /*******************************************************************/
102
+
103
+ Character character_code( unsigned int uc )
104
+
105
+ {
106
+ if (TheAlphabet.utf8)
107
+ return symbol_code(fst_strdup(int2utf8(uc)));
108
+
109
+ unsigned char *buffer=(unsigned char*)malloc(2);
110
+ buffer[0] = (unsigned char)uc;
111
+ buffer[1] = 0;
112
+ return symbol_code((char*)buffer);
113
+ }
114
+
115
+
116
+ /*******************************************************************/
117
+ /* */
118
+ /* add_value */
119
+ /* */
120
+ /*******************************************************************/
121
+
122
+ Range *add_value( Character c, Range *r )
123
+
124
+ {
125
+ Range *result=new Range;
126
+ result->character = c;
127
+ result->next = r;
128
+ return result;
129
+ }
130
+
131
+
132
+ /*******************************************************************/
133
+ /* */
134
+ /* add_values */
135
+ /* */
136
+ /*******************************************************************/
137
+
138
+ Range *add_values( unsigned int c1, unsigned int c2, Range *r )
139
+
140
+ {
141
+ for( unsigned int c=c2; c>=c1; c-- )
142
+ r = add_value(character_code(c), r);
143
+ return r;
144
+ }
145
+
146
+
147
+ /*******************************************************************/
148
+ /* */
149
+ /* append_values */
150
+ /* */
151
+ /*******************************************************************/
152
+
153
+ Range *append_values( Range *r2, Range *r )
154
+
155
+ {
156
+ if (r2 == NULL)
157
+ return r;
158
+ return add_value(r2->character, append_values(r2->next, r));
159
+ }
160
+
161
+
162
+ /*******************************************************************/
163
+ /* */
164
+ /* add_var_values */
165
+ /* */
166
+ /*******************************************************************/
167
+
168
+ Range *add_var_values( char *name, Range *r )
169
+
170
+ {
171
+ return append_values(svar_value(name), r);
172
+ }
173
+
174
+
175
+ /*******************************************************************/
176
+ /* */
177
+ /* in_range */
178
+ /* */
179
+ /*******************************************************************/
180
+
181
+ static bool in_range( unsigned int c, Range *r )
182
+
183
+ {
184
+ while (r) {
185
+ if (r->character == c)
186
+ return true;
187
+ r = r->next;
188
+ }
189
+ return false;
190
+ }
191
+
192
+
193
+ /*******************************************************************/
194
+ /* */
195
+ /* free_values */
196
+ /* */
197
+ /*******************************************************************/
198
+
199
+ static void free_values( Range *r )
200
+
201
+ {
202
+ while (r) {
203
+ Range *tmp=r;
204
+ r = r->next;
205
+ delete tmp;
206
+ }
207
+ }
208
+
209
+
210
+ /*******************************************************************/
211
+ /* */
212
+ /* free_values */
213
+ /* */
214
+ /*******************************************************************/
215
+
216
+ static void free_values( Ranges *r )
217
+
218
+ {
219
+ while (r) {
220
+ Ranges *tmp=r;
221
+ r = r->next;
222
+ free_values(tmp->range);
223
+ delete tmp;
224
+ }
225
+ }
226
+
227
+
228
+ /*******************************************************************/
229
+ /* */
230
+ /* free_contexts */
231
+ /* */
232
+ /*******************************************************************/
233
+
234
+ static void free_contexts( Contexts *c )
235
+
236
+ {
237
+ while (c) {
238
+ Contexts *tmp=c;
239
+ c = c->next;
240
+ delete tmp;
241
+ }
242
+ }
243
+
244
+
245
+ /*******************************************************************/
246
+ /* */
247
+ /* copy_values */
248
+ /* */
249
+ /*******************************************************************/
250
+
251
+ static Range *copy_values( const Range *r )
252
+
253
+ {
254
+ if (r == NULL)
255
+ return NULL;
256
+ return add_value( r->character, copy_values(r->next));
257
+ }
258
+
259
+
260
+ /*******************************************************************/
261
+ /* */
262
+ /* complement_range */
263
+ /* */
264
+ /*******************************************************************/
265
+
266
+ Range *complement_range( Range *r )
267
+
268
+ {
269
+ vector<Character> sym;
270
+ for( Range *p=r; p; p=p->next)
271
+ sym.push_back( p->character );
272
+ TheAlphabet.complement(sym);
273
+ if (sym.size() == 0)
274
+ error("Empty character range!");
275
+
276
+
277
+ Range *result=NULL;
278
+ for( size_t i=0; i<sym.size(); i++ ) {
279
+ Range *tmp = new Range;
280
+ tmp->character = sym[i];
281
+ tmp->next = result;
282
+ result = tmp;
283
+ }
284
+
285
+ return result;
286
+ }
287
+
288
+
289
+ /*******************************************************************/
290
+ /* */
291
+ /* make_transducer */
292
+ /* */
293
+ /*******************************************************************/
294
+
295
+ static Transducer *make_transducer( Range *r1, Range *r2 )
296
+
297
+ {
298
+ Transducer *t=new Transducer();
299
+ Node *node=t->new_node();
300
+ node->set_final(1);
301
+
302
+ if (r1 == NULL || r2 == NULL) {
303
+ if (!Alphabet_Defined)
304
+ error("The wildcard symbol '.' requires the definition of an alphabet");
305
+
306
+ // one of the ranges was '.'
307
+ for(Alphabet::const_iterator it=TheAlphabet.begin();
308
+ it!=TheAlphabet.end(); it++)
309
+ if ((r1 == NULL || in_range(it->lower_char(), r1)) &&
310
+ (r2 == NULL || in_range(it->upper_char(), r2)))
311
+ t->root_node()->add_arc( *it, node, t );
312
+ }
313
+ else {
314
+ for (;;) {
315
+ Label l(r1->character, r2->character);
316
+ // TheAlphabet.insert(l);
317
+ t->root_node()->add_arc( l, node, t );
318
+ if (!r1->next && !r2->next)
319
+ break;
320
+ if (r1->next)
321
+ r1 = r1->next;
322
+ if (r2->next)
323
+ r2 = r2->next;
324
+ }
325
+ }
326
+
327
+ return t;
328
+ }
329
+
330
+
331
+ /*******************************************************************/
332
+ /* */
333
+ /* empty_transducer */
334
+ /* */
335
+ /*******************************************************************/
336
+
337
+ static Transducer *empty_transducer()
338
+
339
+ {
340
+ Transducer *t=new Transducer();
341
+ t->root_node()->set_final(1);
342
+
343
+ return t;
344
+ }
345
+
346
+
347
+ /*******************************************************************/
348
+ /* */
349
+ /* one_label_transducer */
350
+ /* */
351
+ /*******************************************************************/
352
+
353
+ static Transducer *one_label_transducer( Label l )
354
+
355
+ {
356
+ Transducer *t = new Transducer();
357
+ Node *last = t->new_node();
358
+ t->root_node()->add_arc( l, last, t );
359
+ last->set_final(1);
360
+
361
+ return t;
362
+ }
363
+
364
+
365
+ /*******************************************************************/
366
+ /* */
367
+ /* new_transducer */
368
+ /* */
369
+ /*******************************************************************/
370
+
371
+ Transducer *new_transducer( Range *r1, Range *r2 )
372
+
373
+ {
374
+ Transducer *t=make_transducer( r1, r2);
375
+ if (r1 != r2)
376
+ free_values(r1);
377
+ free_values(r2);
378
+ return t;
379
+ }
380
+
381
+
382
+ /*******************************************************************/
383
+ /* */
384
+ /* read_words */
385
+ /* */
386
+ /*******************************************************************/
387
+
388
+ Transducer *read_words( char *filename )
389
+
390
+ {
391
+ if (Verbose)
392
+ fprintf(stderr,"\nreading words from %s...", filename);
393
+ ifstream is(filename);
394
+ if (!is.is_open()) {
395
+ static char message[1000];
396
+ sprintf(message,"Error: Cannot open file \"%s\"!",filename);
397
+ throw message;
398
+ }
399
+ Transducer *t = new Transducer(is, &TheAlphabet, Verbose);
400
+ is.close();
401
+ TheAlphabet.insert_symbols(t->alphabet);
402
+ if (Verbose)
403
+ fprintf(stderr,"finished\n");
404
+ return t;
405
+ }
406
+
407
+
408
+ /*******************************************************************/
409
+ /* */
410
+ /* read_transducer */
411
+ /* */
412
+ /*******************************************************************/
413
+
414
+ Transducer *read_transducer( char *filename )
415
+
416
+ {
417
+ if (Verbose)
418
+ fprintf(stderr,"\nreading transducer from %s...", filename);
419
+ FILE *file = fopen(filename,"rb");
420
+ if (file == NULL) {
421
+ static char message[1000];
422
+ sprintf(message,"Error: Cannot open file \"%s\"!",filename);
423
+ throw message;
424
+ }
425
+ Transducer t(file);
426
+ fclose(file);
427
+ if (t.alphabet.utf8 != TheAlphabet.utf8) {
428
+ static char message[1000];
429
+ sprintf(message,"Error: incompatible character encoding in file \"%s\"!",
430
+ filename);
431
+ throw message;
432
+ }
433
+ Transducer *nt = &t.copy(false, &TheAlphabet);
434
+ TheAlphabet.insert_symbols(nt->alphabet);
435
+ if (Verbose)
436
+ fprintf(stderr,"finished\n");
437
+ return nt;
438
+ }
439
+
440
+
441
+ /*******************************************************************/
442
+ /* */
443
+ /* def_alphabet */
444
+ /* */
445
+ /*******************************************************************/
446
+
447
+ void def_alphabet( Transducer *t )
448
+
449
+ {
450
+ t = explode(t);
451
+ t = minimise(t);
452
+ t->alphabet.clear_char_pairs();
453
+ t->complete_alphabet();
454
+ TheAlphabet.clear_char_pairs();
455
+ TheAlphabet.copy(t->alphabet);
456
+ Alphabet_Defined = 1;
457
+ delete t;
458
+ }
459
+
460
+
461
+ /*******************************************************************/
462
+ /* */
463
+ /* def_svar definition of a value range variable */
464
+ /* */
465
+ /*******************************************************************/
466
+
467
+ bool def_svar( char *name, Range *r )
468
+
469
+ {
470
+ // delete the old value of the variable
471
+ SVarMap::iterator it=SVM.find(name);
472
+ if (it != SVM.end()) {
473
+ char *n=it->first;
474
+ Range *v=it->second;
475
+ SVM.erase(it);
476
+ delete v;
477
+ free(n);
478
+ }
479
+ SVM[name] = r;
480
+ return r == NULL;
481
+ }
482
+
483
+
484
+ /*******************************************************************/
485
+ /* */
486
+ /* svar_value */
487
+ /* */
488
+ /*******************************************************************/
489
+
490
+ Range *svar_value( char *name )
491
+
492
+ {
493
+ SVarMap::iterator it=SVM.find(name);
494
+ if (it == SVM.end())
495
+ error2("undefined variable", name);
496
+ free(name);
497
+ return copy_values(it->second);
498
+ }
499
+
500
+
501
+ /*******************************************************************/
502
+ /* */
503
+ /* rsvar_value */
504
+ /* */
505
+ /*******************************************************************/
506
+
507
+ Range *rsvar_value( char *name )
508
+
509
+ {
510
+ if (RSS.find(name) == RSS.end())
511
+ RSS.insert(fst_strdup(name));
512
+ return add_value(symbol_code(name), NULL);
513
+ }
514
+
515
+
516
+ /*******************************************************************/
517
+ /* */
518
+ /* def_var definition of an transducer variable */
519
+ /* */
520
+ /*******************************************************************/
521
+
522
+ bool def_var( char *name, Transducer *t )
523
+
524
+ {
525
+ // delete the old value of the variable
526
+ VarMap::iterator it=VM.find(name);
527
+ if (it != VM.end()) {
528
+ char *n=it->first;
529
+ Transducer *v=it->second;
530
+ VM.erase(it);
531
+ delete v;
532
+ free(n);
533
+ }
534
+
535
+ t = explode(t);
536
+ t = minimise(t);
537
+
538
+ VM[name] = t;
539
+ return t->is_empty();
540
+ }
541
+
542
+
543
+ /*******************************************************************/
544
+ /* */
545
+ /* def_rvar definition of an agreement variable for automata */
546
+ /* */
547
+ /*******************************************************************/
548
+
549
+ bool def_rvar( char *name, Transducer *t )
550
+
551
+ {
552
+ if (t->is_cyclic())
553
+ error2("cyclic transducer assigned to", name);
554
+ return def_var( name, t );
555
+ }
556
+
557
+
558
+ /*******************************************************************/
559
+ /* */
560
+ /* var_value */
561
+ /* */
562
+ /*******************************************************************/
563
+
564
+ Transducer *var_value( char *name )
565
+
566
+ {
567
+ VarMap::iterator it=VM.find(name);
568
+ if (it == VM.end())
569
+ error2("undefined variable", name);
570
+ free(name);
571
+ return &(it->second->copy());
572
+ }
573
+
574
+
575
+ /*******************************************************************/
576
+ /* */
577
+ /* rvar_value */
578
+ /* */
579
+ /*******************************************************************/
580
+
581
+ Transducer *rvar_value( char *name )
582
+
583
+ {
584
+ if (RS.find(name) == RS.end())
585
+ RS.insert(fst_strdup(name));
586
+ Range *r=add_value(symbol_code(name), NULL);
587
+ return new_transducer(r,r);
588
+ }
589
+
590
+
591
+ /*******************************************************************/
592
+ /* */
593
+ /* explode */
594
+ /* */
595
+ /*******************************************************************/
596
+
597
+ Transducer *explode( Transducer *t )
598
+
599
+ {
600
+ if (RS.size() == 0 && RSS.size() == 0)
601
+ return t;
602
+
603
+ t = minimise(t);
604
+
605
+ vector<char*> name;
606
+ for( RVarSet::iterator it=RS.begin(); it!=RS.end(); it++)
607
+ name.push_back(*it);
608
+ RS.clear();
609
+
610
+ // replace all agreement variables
611
+ for( size_t i=0; i<name.size(); i++ ) {
612
+ Transducer *nt = NULL;
613
+ Label l(TheAlphabet.symbol2code(name[i]));
614
+ Transducer *vt=var_value(name[i]);
615
+
616
+ // enumerate all paths of the transducer
617
+ vector<Transducer*> it;
618
+ vt->enumerate_paths(it);
619
+ delete vt;
620
+
621
+ // insert each path
622
+ for( size_t i=0; i<it.size(); i++ ) {
623
+
624
+ // insertion
625
+ Transducer *t1 = &t->splice(l, it[i]);
626
+ delete it[i];
627
+
628
+ if (nt == NULL)
629
+ nt = t1;
630
+ else
631
+ nt = disjunction(nt, t1);
632
+ }
633
+ delete t;
634
+ t = nt;
635
+ }
636
+
637
+ name.clear();
638
+ for( RVarSet::iterator it=RSS.begin(); it!=RSS.end(); it++)
639
+ name.push_back(*it);
640
+ RSS.clear();
641
+
642
+ // replace all agreement variables
643
+ for( size_t i=0; i<name.size(); i++ ) {
644
+ Transducer *nt = NULL;
645
+ Character c=TheAlphabet.symbol2code(name[i]);
646
+ Range *r=svar_value(name[i]);
647
+
648
+ // insert each character
649
+ while (r != NULL) {
650
+
651
+ // insertion
652
+ Transducer *t1 = &t->replace_char(c, r->character);
653
+
654
+ if (nt == NULL)
655
+ nt = t1;
656
+ else
657
+ nt = disjunction(nt, t1);
658
+
659
+ Range *next = r->next;
660
+ delete r;
661
+ r = next;
662
+ }
663
+ delete t;
664
+ t = nt;
665
+ }
666
+
667
+ return t;
668
+ }
669
+
670
+
671
+ /*******************************************************************/
672
+ /* */
673
+ /* catenate */
674
+ /* */
675
+ /*******************************************************************/
676
+
677
+ Transducer *catenate( Transducer *t1, Transducer *t2 )
678
+
679
+ {
680
+ Transducer *t = &(*t1 + *t2);
681
+ delete t1;
682
+ delete t2;
683
+ return t;
684
+ }
685
+
686
+
687
+ /*******************************************************************/
688
+ /* */
689
+ /* add_range */
690
+ /* */
691
+ /*******************************************************************/
692
+
693
+ Ranges *add_range( Range *r, Ranges *l )
694
+
695
+ {
696
+ Ranges *result = new Ranges;
697
+ result->range = r;
698
+ result->next = l;
699
+ return result;
700
+ }
701
+
702
+
703
+ /*******************************************************************/
704
+ /* */
705
+ /* make_mapping */
706
+ /* */
707
+ /*******************************************************************/
708
+
709
+ Transducer *make_mapping( Ranges *list1, Ranges *list2 )
710
+
711
+ {
712
+ Ranges *l1=list1;
713
+ Ranges *l2=list2;
714
+ Transducer *t=new Transducer();
715
+
716
+ Node *node=t->root_node();
717
+ while (l1 && l2) {
718
+ Node *nn=t->new_node();
719
+ for( Range *r1=l1->range; r1; r1=r1->next )
720
+ for( Range *r2=l2->range; r2; r2=r2->next )
721
+ node->add_arc( Label(r1->character, r2->character), nn, t );
722
+ node = nn;
723
+ l1 = l1->next;
724
+ l2 = l2->next;
725
+ }
726
+ while (l1) {
727
+ Node *nn=t->new_node();
728
+ for( Range *r1=l1->range; r1; r1=r1->next )
729
+ node->add_arc( Label(r1->character, Label::epsilon), nn, t );
730
+ node = nn;
731
+ l1 = l1->next;
732
+ }
733
+ while (l2) {
734
+ Node *nn=t->new_node();
735
+ for( Range *r2=l2->range; r2; r2=r2->next )
736
+ node->add_arc( Label(Label::epsilon, r2->character), nn, t );
737
+ node = nn;
738
+ l2 = l2->next;
739
+ }
740
+ node->set_final(1);
741
+
742
+ free_values(list1);
743
+ free_values(list2);
744
+ return t;
745
+ }
746
+
747
+
748
+ /*******************************************************************/
749
+ /* */
750
+ /* disjunction */
751
+ /* */
752
+ /*******************************************************************/
753
+
754
+ Transducer *disjunction( Transducer *t1, Transducer *t2 )
755
+
756
+ {
757
+ Transducer *t = &(*t1 | *t2);
758
+ delete t1;
759
+ delete t2;
760
+ return t;
761
+ }
762
+
763
+
764
+ /*******************************************************************/
765
+ /* */
766
+ /* conjunction */
767
+ /* */
768
+ /*******************************************************************/
769
+
770
+ Transducer *conjunction( Transducer *t1, Transducer *t2 )
771
+
772
+ {
773
+ if (RS.size() > 0 || RSS.size() > 0)
774
+ cerr << "\nWarning: agreement operation inside of conjunction!\n";
775
+ Transducer *t = &(*t1 & *t2);
776
+ delete t1;
777
+ delete t2;
778
+ return t;
779
+ }
780
+
781
+
782
+ /*******************************************************************/
783
+ /* */
784
+ /* subtraction */
785
+ /* */
786
+ /*******************************************************************/
787
+
788
+ Transducer *subtraction( Transducer *t1, Transducer *t2 )
789
+
790
+ {
791
+ if (RS.size() > 0 || RSS.size() > 0)
792
+ cerr << "\nWarning: agreement operation inside of conjunction!\n";
793
+ Transducer *t = &(*t1 / *t2);
794
+ delete t1;
795
+ delete t2;
796
+ return t;
797
+ }
798
+
799
+
800
+ /*******************************************************************/
801
+ /* */
802
+ /* composition */
803
+ /* */
804
+ /*******************************************************************/
805
+
806
+ Transducer *composition( Transducer *t1, Transducer *t2 )
807
+
808
+ {
809
+ if (RS.size() > 0 || RSS.size() > 0)
810
+ cerr << "\nWarning: agreement operation inside of composition!\n";
811
+ Transducer *t = &(*t1 || *t2);
812
+ delete t1;
813
+ delete t2;
814
+ return t;
815
+ }
816
+
817
+ /*******************************************************************/
818
+ /* */
819
+ /* freely_insert */
820
+ /* */
821
+ /*******************************************************************/
822
+
823
+ Transducer *freely_insert( Transducer *t, Character lc, Character uc )
824
+
825
+ {
826
+ return &t->freely_insert(Label(lc,uc));
827
+ }
828
+
829
+
830
+ /*******************************************************************/
831
+ /* */
832
+ /* negation */
833
+ /* */
834
+ /*******************************************************************/
835
+
836
+ Transducer *negation( Transducer *t )
837
+
838
+ {
839
+ if (RS.size() > 0 || RSS.size() > 0)
840
+ cerr << "\nWarning: agreement operation inside of negation!\n";
841
+ if (!Alphabet_Defined)
842
+ error("Negation requires the definition of an alphabet");
843
+ t->alphabet.clear_char_pairs();
844
+ t->alphabet.copy(TheAlphabet);
845
+ Transducer *nt = &(!*t);
846
+ delete t;
847
+ return nt;
848
+ }
849
+
850
+
851
+ /*******************************************************************/
852
+ /* */
853
+ /* upper_level */
854
+ /* */
855
+ /*******************************************************************/
856
+
857
+ Transducer *upper_level( Transducer *t )
858
+
859
+ {
860
+ Transducer *nt = &t->upper_level();
861
+ delete t;
862
+ return nt;
863
+ }
864
+
865
+
866
+ /*******************************************************************/
867
+ /* */
868
+ /* lower_level */
869
+ /* */
870
+ /*******************************************************************/
871
+
872
+ Transducer *lower_level( Transducer *t )
873
+
874
+ {
875
+ Transducer *nt = &t->lower_level();
876
+ delete t;
877
+ return nt;
878
+ }
879
+
880
+
881
+ /*******************************************************************/
882
+ /* */
883
+ /* minimise */
884
+ /* */
885
+ /*******************************************************************/
886
+
887
+ Transducer *minimise( Transducer *t )
888
+
889
+ {
890
+ t->alphabet.copy(TheAlphabet);
891
+ Transducer *nt = &t->minimise( Verbose );
892
+ delete t;
893
+ return nt;
894
+ }
895
+
896
+
897
+ /*******************************************************************/
898
+ /* */
899
+ /* switch_levels */
900
+ /* */
901
+ /*******************************************************************/
902
+
903
+ Transducer *switch_levels( Transducer *t )
904
+
905
+ {
906
+ Transducer *nt = &t->switch_levels();
907
+ delete t;
908
+ return nt;
909
+ }
910
+
911
+
912
+ /*******************************************************************/
913
+ /* */
914
+ /* repetition */
915
+ /* */
916
+ /*******************************************************************/
917
+
918
+ Transducer *repetition( Transducer *t )
919
+
920
+ {
921
+ Transducer *nt = &(t->kleene_star());
922
+ delete t;
923
+ return nt;
924
+ }
925
+
926
+
927
+ /*******************************************************************/
928
+ /* */
929
+ /* repetition2 */
930
+ /* */
931
+ /*******************************************************************/
932
+
933
+ Transducer *repetition2( Transducer *t )
934
+
935
+ {
936
+ Transducer *t1 = &(t->kleene_star());
937
+ Transducer *nt = &(*t + *t1);
938
+ delete t;
939
+ delete t1;
940
+ return nt;
941
+ }
942
+
943
+
944
+ /*******************************************************************/
945
+ /* */
946
+ /* optional */
947
+ /* */
948
+ /*******************************************************************/
949
+
950
+ Transducer *optional( Transducer *t )
951
+
952
+ {
953
+ Transducer *nt = &(t->copy());
954
+ nt->root_node()->set_final(1);
955
+ delete t;
956
+ return nt;
957
+ }
958
+
959
+
960
+ /*******************************************************************/
961
+ /* */
962
+ /* add_pi_transitions */
963
+ /* */
964
+ /*******************************************************************/
965
+
966
+ static void add_pi_transitions( Transducer *t, Node *node, Alphabet &alph )
967
+
968
+ {
969
+ for( Alphabet::const_iterator it=alph.begin(); it!=alph.end(); it++)
970
+ node->add_arc( *it, node, t );
971
+ }
972
+
973
+
974
+ /*******************************************************************/
975
+ /* */
976
+ /* pi_machine */
977
+ /* */
978
+ /*******************************************************************/
979
+
980
+ static Transducer *pi_machine( Alphabet &alph )
981
+
982
+ {
983
+ Transducer *t=new Transducer();
984
+ t->root_node()->set_final(1);
985
+ add_pi_transitions( t, t->root_node(), alph );
986
+ return t;
987
+ }
988
+
989
+
990
+ /*******************************************************************/
991
+ /* */
992
+ /* cp */
993
+ /* */
994
+ /*******************************************************************/
995
+
996
+ static Transducer *cp( Range *lower_range, Range *upper_range )
997
+
998
+ {
999
+ return make_transducer(lower_range, upper_range);
1000
+ }
1001
+
1002
+
1003
+ /*******************************************************************/
1004
+ /* */
1005
+ /* anti_cp */
1006
+ /* */
1007
+ /*******************************************************************/
1008
+
1009
+ static Transducer *anti_cp( Range *lower_range, Range *upper_range )
1010
+
1011
+ {
1012
+ Transducer *cpt = cp(lower_range, upper_range);
1013
+ Transducer *t=new Transducer();
1014
+ Node *node=t->new_node();
1015
+
1016
+ node->set_final(1);
1017
+ for(Alphabet::const_iterator it=TheAlphabet.begin();
1018
+ it!=TheAlphabet.end(); it++){
1019
+ Label l=*it;
1020
+ if (in_range(l.lower_char(), lower_range) &&
1021
+ !cpt->root_node()->target_node(l))
1022
+ t->root_node()->add_arc( l, node, t );
1023
+ }
1024
+ if (in_range(Label::epsilon, lower_range) &&
1025
+ !cpt->root_node()->target_node(Label()))
1026
+ t->root_node()->add_arc( Label(), node, t );
1027
+
1028
+ delete cpt;
1029
+ return t;
1030
+ }
1031
+
1032
+
1033
+ /*******************************************************************/
1034
+ /* */
1035
+ /* twol_right_rule */
1036
+ /* */
1037
+ /*******************************************************************/
1038
+
1039
+ static Transducer *twol_right_rule( Transducer *lc, Range *lower_range,
1040
+ Range *upper_range, Transducer *rc )
1041
+
1042
+ {
1043
+ // Build the rule transducer
1044
+ Transducer *cpt = cp(lower_range, upper_range);
1045
+ Transducer *pi=pi_machine(TheAlphabet);
1046
+
1047
+ // First unwanted language
1048
+
1049
+ lc->alphabet.copy(TheAlphabet);
1050
+ Transducer *notlc = &(!*lc);
1051
+ Transducer *tmp = &(*notlc + *cpt);
1052
+ delete notlc;
1053
+ Transducer *t1 = &(*tmp + *pi);
1054
+ delete tmp;
1055
+
1056
+ // Second unwanted language
1057
+ rc->alphabet.copy(TheAlphabet);
1058
+ Transducer *notrc = &(!*rc);
1059
+ tmp = &(*cpt + *notrc);
1060
+ delete cpt;
1061
+ delete notrc;
1062
+ Transducer *t2 = &(*pi + *tmp);
1063
+ delete pi;
1064
+ delete tmp;
1065
+
1066
+ tmp = &(*t1|*t2);
1067
+ delete t1;
1068
+ delete t2;
1069
+
1070
+ tmp->alphabet.copy(TheAlphabet);
1071
+ t1 = &(!*tmp);
1072
+ delete tmp;
1073
+
1074
+ return t1;
1075
+ }
1076
+
1077
+
1078
+ /*******************************************************************/
1079
+ /* */
1080
+ /* twol_left_rule */
1081
+ /* */
1082
+ /*******************************************************************/
1083
+
1084
+ static Transducer *twol_left_rule( Transducer *lc, Range *lower_range,
1085
+ Range *upper_range, Transducer *rc )
1086
+
1087
+ {
1088
+ // check for problematic insertion operations like "$L <> <= a $R"
1089
+ // where either $L or $R includes the empty string
1090
+ if (in_range(Label::epsilon, lower_range)) {
1091
+ if (lc->generates_empty_string())
1092
+ error("in two level rule: insertion operation with deletable left context!");
1093
+ if (rc->generates_empty_string())
1094
+ error("in two level rule: insertion operation with deletable right context!");
1095
+ cerr << "\nWarning: two level rule used for insertion operation (might produce unexpected results)\n";
1096
+ }
1097
+
1098
+ // Build the rule transducer
1099
+ Transducer *t1 = anti_cp(lower_range, upper_range);
1100
+
1101
+ // Add the left context;
1102
+ Transducer *t2 = &(*lc + *t1);
1103
+ delete t1;
1104
+
1105
+ // Add the right context;
1106
+ t1 = &(*t2 + *rc);
1107
+ delete t2;
1108
+
1109
+ // Form the complement
1110
+ t1->alphabet.copy(TheAlphabet);
1111
+ t2 = &(!*t1);
1112
+ delete t1;
1113
+
1114
+ return t2;
1115
+ }
1116
+
1117
+
1118
+ /*******************************************************************/
1119
+ /* */
1120
+ /* make_rule */
1121
+ /* */
1122
+ /*******************************************************************/
1123
+
1124
+ Transducer *make_rule( Transducer *lc, Range *lower_range, Twol_Type type,
1125
+ Range *upper_range, Transducer *rc )
1126
+
1127
+ {
1128
+ if (RS.size() > 0 || RSS.size() > 0)
1129
+ cerr << "\nWarning: agreement operation inside of replacement rule!\n";
1130
+
1131
+ if (!Alphabet_Defined)
1132
+ error("Two level rules require the definition of an alphabet");
1133
+
1134
+ // expand the left and the right contexts to their full length
1135
+ Transducer *pi=pi_machine(TheAlphabet);
1136
+
1137
+ if (lc == NULL)
1138
+ lc = pi_machine(TheAlphabet);
1139
+ else {
1140
+ Transducer *tmp = &(*pi + *lc);
1141
+ delete lc;
1142
+ lc = tmp;
1143
+ }
1144
+ if (rc == NULL)
1145
+ rc = pi_machine(TheAlphabet);
1146
+ else {
1147
+ Transducer *tmp = &(*rc + *pi);
1148
+ delete rc;
1149
+ rc = tmp;
1150
+ }
1151
+ delete pi;
1152
+
1153
+ Transducer *result = NULL;
1154
+
1155
+ switch (type) {
1156
+ case twol_left:
1157
+ result = twol_left_rule(lc, lower_range, upper_range, rc);
1158
+ break;
1159
+ case twol_right:
1160
+ result = twol_right_rule(lc, lower_range, upper_range, rc);
1161
+ break;
1162
+ case twol_both:
1163
+ {
1164
+ Transducer *t1 = twol_left_rule(lc, lower_range, upper_range, rc);
1165
+ Transducer *t2 = twol_right_rule(lc, lower_range, upper_range, rc);
1166
+ result = &(*t1 & *t2);
1167
+ delete t1;
1168
+ delete t2;
1169
+ }
1170
+ }
1171
+ delete lc;
1172
+ delete rc;
1173
+ if (lower_range != upper_range)
1174
+ free_values(lower_range);
1175
+ free_values(upper_range);
1176
+
1177
+ return minimise(result);
1178
+ }
1179
+
1180
+
1181
+ /*******************************************************************/
1182
+ /* */
1183
+ /* make_context */
1184
+ /* */
1185
+ /*******************************************************************/
1186
+
1187
+ Contexts *make_context( Transducer *l, Transducer *r )
1188
+
1189
+ {
1190
+ if (l == NULL)
1191
+ l = empty_transducer();
1192
+ if (r == NULL)
1193
+ r = empty_transducer();
1194
+
1195
+ Contexts *c=new Contexts();
1196
+ c->left = l;
1197
+ c->right = r;
1198
+ c->next = NULL;
1199
+
1200
+ return c;
1201
+ }
1202
+
1203
+
1204
+ /*******************************************************************/
1205
+ /* */
1206
+ /* add_context */
1207
+ /* */
1208
+ /*******************************************************************/
1209
+
1210
+ Contexts *add_context( Contexts *nc, Contexts *c )
1211
+
1212
+ {
1213
+ nc->next = c;
1214
+ return nc;
1215
+ }
1216
+
1217
+
1218
+ /*******************************************************************/
1219
+ /* */
1220
+ /* restriction_transducer */
1221
+ /* */
1222
+ /*******************************************************************/
1223
+
1224
+ static Transducer *restriction_transducer( Transducer *l1, Transducer *l2,
1225
+ Character marker )
1226
+
1227
+ {
1228
+ l1->alphabet.copy(TheAlphabet);
1229
+ Transducer *t1 = &(*l1 / *l2);
1230
+
1231
+ Transducer *t2 = &t1->replace_char(marker, Label::epsilon);
1232
+ delete t1;
1233
+
1234
+ t2->alphabet.copy(TheAlphabet);
1235
+ t1 = &(!*t2);
1236
+ delete t2;
1237
+
1238
+ return t1;
1239
+ }
1240
+
1241
+
1242
+ /*******************************************************************/
1243
+ /* */
1244
+ /* marker_transducer */
1245
+ /* */
1246
+ /*******************************************************************/
1247
+
1248
+ static Transducer *marker_transducer( Transducer *t, Contexts *c,
1249
+ Character &marker )
1250
+ {
1251
+ marker = TheAlphabet.new_marker();
1252
+ Transducer *result = one_label_transducer( Label(marker) );
1253
+
1254
+ // build the alphabet with a new marker
1255
+ result->alphabet.insert_symbols(t->alphabet);
1256
+ while (c) {
1257
+ result->alphabet.insert_symbols(c->left->alphabet);
1258
+ result->alphabet.insert_symbols(c->right->alphabet);
1259
+ c = c->next;
1260
+ }
1261
+
1262
+ return result;
1263
+ }
1264
+
1265
+
1266
+ /*******************************************************************/
1267
+ /* */
1268
+ /* center_transducer */
1269
+ /* */
1270
+ /*******************************************************************/
1271
+
1272
+ static Transducer *center_transducer( Transducer *t, Transducer *pi,
1273
+ Transducer *mt )
1274
+ {
1275
+ // create the concatenation pi + mt + *t + mt + pi
1276
+ Transducer *t1=&(*pi + *mt);
1277
+ Transducer *t2=&(*t1 + *t);
1278
+ delete t1;
1279
+ t1 = &(*t2 + *mt);
1280
+ delete t2;
1281
+ t2 = &(*t1 + *pi);
1282
+ delete t1;
1283
+ return t2;
1284
+ }
1285
+
1286
+
1287
+ /*******************************************************************/
1288
+ /* */
1289
+ /* context_transducer */
1290
+ /* */
1291
+ /*******************************************************************/
1292
+
1293
+ static Transducer *context_transducer( Transducer *t, Transducer *pi,
1294
+ Transducer *mt, Contexts *c )
1295
+ {
1296
+ // pi + left[i] + mt + pi + mt + right[i] + pi
1297
+
1298
+ Transducer *t1 = &(*mt + *t);
1299
+ Transducer *tmp = &(*t1 + *mt);
1300
+ delete t1;
1301
+ Transducer *result=NULL;
1302
+
1303
+ while (c) {
1304
+ t1 = &(*pi + *c->left);
1305
+ Transducer *t2 = &(*t1 + *tmp);
1306
+ delete t1;
1307
+ t1 = &(*t2 + *c->right);
1308
+ delete t2;
1309
+ t2 = &(*t1 + *pi);
1310
+ delete t1;
1311
+
1312
+ if (result) {
1313
+ t1 = &(*result | *t2);
1314
+ delete t2;
1315
+ result = t1;
1316
+ }
1317
+ else
1318
+ result = t2;
1319
+
1320
+ c = c->next;
1321
+ }
1322
+ delete tmp;
1323
+
1324
+ return result;
1325
+ }
1326
+
1327
+
1328
+
1329
+ /*******************************************************************/
1330
+ /* */
1331
+ /* result_transducer */
1332
+ /* */
1333
+ /*******************************************************************/
1334
+
1335
+ static Transducer *result_transducer( Transducer *l1, Transducer *l2,
1336
+ Twol_Type type, Character marker )
1337
+ {
1338
+ Transducer *result=NULL;
1339
+ if (type == twol_right)
1340
+ result = restriction_transducer( l1, l2, marker );
1341
+ else if (type == twol_left)
1342
+ result = restriction_transducer( l2, l1, marker );
1343
+ else if (type == twol_both) {
1344
+ Transducer *t1 = restriction_transducer( l1, l2, marker );
1345
+ Transducer *t2 = restriction_transducer( l2, l1, marker );
1346
+ result = &(*t1 & *t2);
1347
+ delete t1;
1348
+ delete t2;
1349
+ }
1350
+
1351
+ return result;
1352
+ }
1353
+
1354
+
1355
+ /*******************************************************************/
1356
+ /* */
1357
+ /* restriction */
1358
+ /* */
1359
+ /*******************************************************************/
1360
+
1361
+ Transducer *restriction( Transducer *t, Twol_Type type, Contexts *c,
1362
+ int direction )
1363
+ {
1364
+ Character marker;
1365
+ Transducer *mt=marker_transducer( t, c, marker );
1366
+ Transducer *pi=pi_machine(TheAlphabet);
1367
+ Transducer *l1=center_transducer( t, pi, mt );
1368
+
1369
+ Transducer *tmp;
1370
+ if (direction == 0)
1371
+ tmp = pi;
1372
+ else if (direction == 1) {
1373
+ // compute _t || .*
1374
+ Transducer *t1 = &t->lower_level();
1375
+ tmp = &(*t1 || *pi);
1376
+ delete t1;
1377
+ }
1378
+ else {
1379
+ // compute ^t || .*
1380
+ Transducer *t1 = &t->upper_level();
1381
+ tmp = &(*pi || *t1);
1382
+ delete t1;
1383
+ }
1384
+ delete t;
1385
+
1386
+ Transducer *l2=context_transducer( tmp, pi, mt, c );
1387
+ if (tmp != pi)
1388
+ delete tmp;
1389
+ delete pi;
1390
+ delete mt;
1391
+
1392
+ Transducer *result=result_transducer( l1, l2, type, marker );
1393
+ delete l1;
1394
+ delete l2;
1395
+
1396
+ free_contexts( c );
1397
+
1398
+ return result;
1399
+ }
1400
+
1401
+
1402
+ /*******************************************************************/
1403
+ /* */
1404
+ /* constrain_boundary_transducer */
1405
+ /* */
1406
+ /*******************************************************************/
1407
+
1408
+ Transducer *constrain_boundary_transducer( Character leftm, Character rightm )
1409
+
1410
+ {
1411
+ // create the transducer (.|<L>|<R>)*
1412
+
1413
+ Transducer *tmp=pi_machine(TheAlphabet);
1414
+
1415
+ // create the transducer (.|<L>|<R>)* <L><R> (.|<L>|<R>)*
1416
+ Node *root = tmp->root_node();
1417
+ Node *node = tmp->new_node();
1418
+ Node *last = tmp->new_node();
1419
+
1420
+ root->set_final(0);
1421
+ last->set_final(1);
1422
+
1423
+ root->add_arc( Label(leftm), node, tmp);
1424
+ node->add_arc( Label(rightm), last, tmp);
1425
+
1426
+ add_pi_transitions( tmp, last, TheAlphabet );
1427
+
1428
+ // create the transducer !((.|<L>|<R>)* <L><R> (.|<L>|<R>)*)
1429
+ tmp->alphabet.copy(TheAlphabet);
1430
+ Transducer *result = &(!*tmp);
1431
+ delete tmp;
1432
+
1433
+ return result;
1434
+ }
1435
+
1436
+
1437
+ /*******************************************************************/
1438
+ /* */
1439
+ /* extended_context */
1440
+ /* */
1441
+ /*******************************************************************/
1442
+
1443
+ Transducer *extended_context( Transducer *t, Character m1, Character m2 )
1444
+
1445
+ {
1446
+ if (t == NULL) // empty context
1447
+ return pi_machine(TheAlphabet);
1448
+
1449
+ // Extended context transducer
1450
+
1451
+ // <R> >> (<L> >> $T$)
1452
+ Transducer *tmp=&t->freely_insert( Label(m1) );
1453
+ delete t;
1454
+ t = &tmp->freely_insert( Label(m2) );
1455
+ delete tmp;
1456
+
1457
+ // .* (<R> >> (<L> >> $T$))
1458
+ add_pi_transitions( t, t->root_node(), TheAlphabet );
1459
+
1460
+ // !(.*<L>)
1461
+ tmp = one_label_transducer(Label(m1));
1462
+ add_pi_transitions( tmp, tmp->root_node(), TheAlphabet );
1463
+ tmp->alphabet.copy(TheAlphabet);
1464
+ Transducer *t2 = &(!*tmp);
1465
+ delete tmp;
1466
+
1467
+ // .* (<R> >> (<L> >> $T$)) || !(.*<L>)
1468
+ tmp = &(*t || *t2);
1469
+ delete t;
1470
+ delete t2;
1471
+
1472
+ return tmp;
1473
+ }
1474
+
1475
+
1476
+ /*******************************************************************/
1477
+ /* */
1478
+ /* replace_context */
1479
+ /* */
1480
+ /*******************************************************************/
1481
+
1482
+ Transducer *replace_context( Transducer *t, Character m1, Character m2 )
1483
+
1484
+ {
1485
+ // $C$ = .* (<L> >> (<R> >> $T$))
1486
+ Transducer *ct = extended_context(t, m1, m2);
1487
+
1488
+ // <R>*<L> .*
1489
+ Transducer *mt = one_label_transducer(Label(m1));
1490
+ mt->root_node()->add_arc(Label(m2), mt->root_node(), mt );
1491
+ add_pi_transitions(mt, mt->root_node()->target_node(Label(m1)),TheAlphabet);
1492
+
1493
+ ct->alphabet.copy(TheAlphabet);
1494
+ Transducer *no_ct = &!*ct;
1495
+
1496
+ mt->alphabet.copy(TheAlphabet);
1497
+ Transducer *no_mt = &!*mt;
1498
+
1499
+ Transducer *t1 = &(*no_ct + *mt);
1500
+ delete no_ct;
1501
+ delete mt;
1502
+
1503
+ Transducer *t2 = &(*ct + *no_mt);
1504
+ delete ct;
1505
+ delete no_mt;
1506
+
1507
+ Transducer *tmp = &(*t1 | *t2);
1508
+ delete t1;
1509
+ delete t2;
1510
+
1511
+ tmp->alphabet.copy(TheAlphabet);
1512
+ t1 = &!*tmp;
1513
+ delete tmp;
1514
+
1515
+ return t1;
1516
+ }
1517
+
1518
+
1519
+ /*******************************************************************/
1520
+ /* */
1521
+ /* make_optional */
1522
+ /* */
1523
+ /*******************************************************************/
1524
+
1525
+ static Transducer *make_optional( Transducer *t )
1526
+
1527
+ {
1528
+ Transducer *t1 = pi_machine(TheAlphabet);
1529
+ Transducer *t2 = &(*t | *t1);
1530
+ delete t;
1531
+ delete t1;
1532
+ return t2;
1533
+ }
1534
+
1535
+
1536
+ /*******************************************************************/
1537
+ /* */
1538
+ /* replace */
1539
+ /* */
1540
+ /*******************************************************************/
1541
+
1542
+ Transducer *replace( Transducer *ct, Repl_Type type, bool optional )
1543
+
1544
+ {
1545
+ // compute the no-center transducer
1546
+ Transducer *tmp;
1547
+
1548
+ if (type == repl_up)
1549
+ // _ct
1550
+ tmp = &ct->lower_level();
1551
+ else if (type == repl_down)
1552
+ // ^ct
1553
+ tmp = &ct->upper_level();
1554
+ else
1555
+ error("Invalid type of replace operator");
1556
+
1557
+ // .* _ct
1558
+ add_pi_transitions( tmp, tmp->root_node(), TheAlphabet );
1559
+
1560
+ // .* _ct .*
1561
+ Transducer *t2 = pi_machine(TheAlphabet);
1562
+ Transducer *t3 = &(*tmp + *t2);
1563
+ delete tmp;
1564
+ delete t2;
1565
+
1566
+ // no_ct = !(.* _ct .*)
1567
+ t3->alphabet.copy(TheAlphabet);
1568
+ Transducer *no_ct = &(!*t3);
1569
+ delete t3;
1570
+
1571
+ // compute the unconditional replacement transducer
1572
+
1573
+ // no-ct ct
1574
+ tmp = &(*no_ct + *ct);
1575
+ delete ct;
1576
+
1577
+ // (no-ct ct)*
1578
+ t2 = &(tmp->kleene_star());
1579
+ delete tmp;
1580
+
1581
+ // (no-ct ct)* no-ct
1582
+ tmp = &(*t2 + *no_ct);
1583
+ delete t2;
1584
+ delete no_ct;
1585
+
1586
+ if (optional)
1587
+ tmp = make_optional(tmp);
1588
+
1589
+ return tmp;
1590
+ }
1591
+
1592
+
1593
+ /*******************************************************************/
1594
+ /* */
1595
+ /* replace_transducer */
1596
+ /* */
1597
+ /*******************************************************************/
1598
+
1599
+ Transducer *replace_transducer( Transducer *ct, Character lm, Character rm,
1600
+ Repl_Type type )
1601
+ {
1602
+ // insert boundary markers into the center transducer
1603
+
1604
+ // <L> >> (<R> >> $Center$)
1605
+ Transducer *tmp = &ct->freely_insert(Label(lm));
1606
+ delete ct;
1607
+ ct = &tmp->freely_insert(Label(rm));
1608
+ delete tmp;
1609
+
1610
+ // add surrounding boundary markers to the center transducer
1611
+
1612
+ // <L> (<L> >> (<R> >> $Center$))
1613
+ Transducer *t2 = one_label_transducer( Label(lm) );
1614
+ tmp = &(*t2 + *ct);
1615
+ delete t2;
1616
+ delete ct;
1617
+
1618
+ // $CenterB$ = <L> (<L> >> (<R> >> $Center$)) <R>
1619
+ t2 = one_label_transducer( Label(rm) );
1620
+ ct = &(*tmp + *t2);
1621
+ delete tmp;
1622
+ delete t2;
1623
+
1624
+ return replace(ct, type, false);
1625
+ }
1626
+
1627
+
1628
+ /*******************************************************************/
1629
+ /* */
1630
+ /* replace_in_context */
1631
+ /* */
1632
+ /*******************************************************************/
1633
+
1634
+ Transducer *replace_in_context( Transducer *t, Repl_Type type, Contexts *c,
1635
+ bool optional )
1636
+ {
1637
+ if (!Alphabet_Defined)
1638
+ error("The replace operators require the definition of an alphabet");
1639
+
1640
+ if (!c->left->is_automaton() || !c->right->is_automaton())
1641
+ error("The replace operators require automata as context expressions!");
1642
+
1643
+ // create the marker symbols
1644
+ Character leftm = TheAlphabet.new_marker();
1645
+ Character rightm = TheAlphabet.new_marker();
1646
+
1647
+ /////////////////////////////////////////////////////////////
1648
+ // Create the insert boundary transducer (.|<>:<L>|<>:<R>)*
1649
+ /////////////////////////////////////////////////////////////
1650
+
1651
+ Transducer *ibt=pi_machine(TheAlphabet);
1652
+ Node *root=ibt->root_node();
1653
+ root->add_arc( Label(Label::epsilon, leftm), root, ibt);
1654
+ root->add_arc( Label(Label::epsilon, rightm),root, ibt);
1655
+
1656
+ /////////////////////////////////////////////////////////////
1657
+ // Create the remove boundary transducer (.|<L>:<>|<R>:<>)*
1658
+ /////////////////////////////////////////////////////////////
1659
+
1660
+ Transducer *rbt=pi_machine(TheAlphabet);
1661
+ root = rbt->root_node();
1662
+ root->add_arc( Label(leftm, Label::epsilon), root, rbt);
1663
+ root->add_arc( Label(rightm,Label::epsilon), root, rbt);
1664
+
1665
+ // Add the markers to the alphabet
1666
+ TheAlphabet.insert(Label(leftm));
1667
+ TheAlphabet.insert(Label(rightm));
1668
+
1669
+ /////////////////////////////////////////////////////////////
1670
+ // Create the constrain boundary transducer !(.*<L><R>.*)
1671
+ /////////////////////////////////////////////////////////////
1672
+
1673
+ Transducer *cbt=constrain_boundary_transducer(leftm, rightm);
1674
+
1675
+ /////////////////////////////////////////////////////////////
1676
+ // Create the extended context transducers
1677
+ /////////////////////////////////////////////////////////////
1678
+
1679
+ // left context transducer: .* (<R> >> (<L> >> $T$)) || !(.*<L>)
1680
+ Transducer *lct = replace_context(c->left, leftm, rightm);
1681
+
1682
+ // right context transducer: (<R> >> (<L> >> $T$)) .* || !(<R>.*)
1683
+ Transducer *tmp = &c->right->reverse();
1684
+ delete c->right;
1685
+ Transducer *t2 = replace_context(tmp, rightm, leftm);
1686
+ Transducer *rct = &t2->reverse();
1687
+ delete t2;
1688
+
1689
+ /////////////////////////////////////////////////////////////
1690
+ // unconditional replace transducer
1691
+ /////////////////////////////////////////////////////////////
1692
+
1693
+ Transducer *rt;
1694
+ if (type == repl_up || type == repl_right || type == repl_left)
1695
+ rt = replace_transducer( t, leftm, rightm, repl_up );
1696
+ else
1697
+ rt = replace_transducer( t, leftm, rightm, repl_down );
1698
+
1699
+ /////////////////////////////////////////////////////////////
1700
+ // build the conditional replacement transducer
1701
+ /////////////////////////////////////////////////////////////
1702
+
1703
+ tmp = ibt;
1704
+ tmp = &(*ibt || *cbt);
1705
+ delete(ibt);
1706
+ delete(cbt);
1707
+
1708
+ if (type == repl_up || type == repl_right) {
1709
+ t2 = &(*tmp || *rct);
1710
+ delete tmp;
1711
+ delete rct;
1712
+ tmp = t2;
1713
+ }
1714
+ if (type == repl_up || type == repl_left) {
1715
+ t2 = &(*tmp || *lct);
1716
+ delete tmp;
1717
+ delete lct;
1718
+ tmp = t2;
1719
+ }
1720
+
1721
+ t2 = &(*tmp || *rt);
1722
+ delete tmp;
1723
+ delete rt;
1724
+ tmp = t2;
1725
+
1726
+ if (type == repl_down || type == repl_right) {
1727
+ t2 = &(*tmp || *lct);
1728
+ delete tmp;
1729
+ delete lct;
1730
+ tmp = t2;
1731
+ }
1732
+ if (type == repl_down || type == repl_left) {
1733
+ t2 = &(*tmp || *rct);
1734
+ delete tmp;
1735
+ delete rct;
1736
+ tmp = t2;
1737
+ }
1738
+
1739
+ t2 = &(*tmp || *rbt);
1740
+ delete tmp;
1741
+ delete rbt;
1742
+
1743
+ // Remove the markers from the alphabet
1744
+ TheAlphabet.delete_markers();
1745
+
1746
+ if (optional)
1747
+ t2 = make_optional(t2);
1748
+
1749
+ free_contexts( c );
1750
+
1751
+ return t2;
1752
+ }
1753
+
1754
+
1755
+ /*******************************************************************/
1756
+ /* */
1757
+ /* add_alphabet */
1758
+ /* */
1759
+ /*******************************************************************/
1760
+
1761
+ void add_alphabet( Transducer *t )
1762
+
1763
+ {
1764
+ t->alphabet.copy(TheAlphabet);
1765
+ t->complete_alphabet();
1766
+ }
1767
+
1768
+
1769
+ /*******************************************************************/
1770
+ /* */
1771
+ /* store_transducer */
1772
+ /* */
1773
+ /*******************************************************************/
1774
+
1775
+ void store_transducer( Transducer *t, char *filename )
1776
+
1777
+ {
1778
+ if (filename == NULL)
1779
+ cout << *t;
1780
+ else {
1781
+ ofstream os(filename);
1782
+ os << *t;
1783
+ os.close();
1784
+ }
1785
+ }
1786
+
1787
+ /*******************************************************************/
1788
+ /* */
1789
+ /* write_to_file */
1790
+ /* */
1791
+ /*******************************************************************/
1792
+
1793
+ void write_to_file( Transducer *t, char *filename)
1794
+
1795
+ {
1796
+ FILE *file;
1797
+ if ((file = fopen(filename,"wb")) == NULL) {
1798
+ fprintf(stderr,"\nError: Cannot open output file \"%s\"\n\n", filename);
1799
+ exit(1);
1800
+ }
1801
+
1802
+ t = explode(t);
1803
+ add_alphabet(t);
1804
+ t = minimise(t);
1805
+ t->store(file);
1806
+ fclose(file);
1807
+ }
1808
+
1809
+
1810
+ /*******************************************************************/
1811
+ /* */
1812
+ /* result */
1813
+ /* */
1814
+ /*******************************************************************/
1815
+
1816
+ Transducer *result( Transducer *t, bool switch_flag )
1817
+
1818
+ {
1819
+ t = explode(t);
1820
+
1821
+ // delete the variable values
1822
+ vector<char*> s;
1823
+ for( VarMap::iterator it=VM.begin(); it != VM.end(); it++ ) {
1824
+ s.push_back(it->first);
1825
+ delete it->second;
1826
+ it->second = NULL;
1827
+ }
1828
+ VM.clear();
1829
+ for( size_t i=0; i<s.size(); i++ )
1830
+ free(s[i]);
1831
+ s.clear();
1832
+
1833
+ if (switch_flag)
1834
+ t = switch_levels(t);
1835
+ add_alphabet(t);
1836
+ t = minimise(t);
1837
+ return t;
1838
+ }