ruby-sfst 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -0
  3. data/COPYING +280 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +54 -0
  6. data/README.md +1 -1
  7. data/Rakefile +9 -18
  8. data/bin/console +7 -0
  9. data/bin/setup +6 -0
  10. data/ext/sfst/alphabet.cc +879 -0
  11. data/ext/sfst/alphabet.h +302 -0
  12. data/ext/sfst/basic.cc +85 -0
  13. data/ext/{sfst_machine → sfst}/basic.h +7 -4
  14. data/ext/sfst/compact.cc +629 -0
  15. data/ext/sfst/compact.h +100 -0
  16. data/ext/sfst/determinise.cc +279 -0
  17. data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
  18. data/ext/sfst/fst.cc +1150 -0
  19. data/ext/sfst/fst.h +374 -0
  20. data/ext/sfst/hopcroft.cc +681 -0
  21. data/ext/sfst/interface.cc +1921 -0
  22. data/ext/sfst/interface.h +171 -0
  23. data/ext/sfst/make-compact.cc +323 -0
  24. data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
  25. data/ext/sfst/mem.h +80 -0
  26. data/ext/sfst/operators.cc +1273 -0
  27. data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
  28. data/ext/sfst/sgi.h +72 -0
  29. data/ext/sfst/utf8.cc +149 -0
  30. data/ext/{sfst_machine → sfst}/utf8.h +7 -4
  31. data/lib/sfst.rb +2 -1
  32. data/lib/sfst/version.rb +1 -1
  33. data/ruby-sfst.gemspec +23 -23
  34. metadata +107 -35
  35. data/ext/sfst_machine/alphabet.cc +0 -812
  36. data/ext/sfst_machine/alphabet.h +0 -273
  37. data/ext/sfst_machine/basic.cc +0 -84
  38. data/ext/sfst_machine/compact.cc +0 -616
  39. data/ext/sfst_machine/compact.h +0 -98
  40. data/ext/sfst_machine/determinise.cc +0 -303
  41. data/ext/sfst_machine/fst.cc +0 -1000
  42. data/ext/sfst_machine/fst.h +0 -369
  43. data/ext/sfst_machine/interface.cc +0 -1842
  44. data/ext/sfst_machine/interface.h +0 -93
  45. data/ext/sfst_machine/make-compact.cc +0 -327
  46. data/ext/sfst_machine/mem.h +0 -74
  47. data/ext/sfst_machine/operators.cc +0 -1131
  48. data/ext/sfst_machine/sgi.h +0 -44
  49. data/ext/sfst_machine/utf8.cc +0 -146
  50. data/test/test_sfst.fst +0 -3
  51. data/test/test_sfst.rb +0 -114
@@ -0,0 +1,1921 @@
1
+ /*******************************************************************/
2
+ /* */
3
+ /* FILE interface.C */
4
+ /* MODULE interface */
5
+ /* PROGRAM SFST */
6
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
7
+ /* */
8
+ /*******************************************************************/
9
+
10
+ #include <fstream>
11
+ #include <set>
12
+
13
+ #include "interface.h"
14
+
15
+ using std::ifstream;
16
+ using std::vector;
17
+
18
+ namespace SFST {
19
+
20
+ /*******************************************************************/
21
+ /* */
22
+ /* error */
23
+ /* */
24
+ /*******************************************************************/
25
+
26
+ void error( const char *message )
27
+
28
+ {
29
+ cerr << "\nError: " << message << "\naborted.\n";
30
+ exit(1);
31
+ }
32
+
33
+
34
+ /*******************************************************************/
35
+ /* */
36
+ /* error2 */
37
+ /* */
38
+ /*******************************************************************/
39
+
40
+ void error2( const char *message, char *input )
41
+
42
+ {
43
+ cerr << "\nError: " << message << ": " << input << "\naborted.\n";
44
+ exit(1);
45
+ }
46
+
47
+
48
+ /*******************************************************************/
49
+ /* */
50
+ /* Interface::symbol_code */
51
+ /* */
52
+ /*******************************************************************/
53
+
54
+ Character Interface::symbol_code( char *symbol )
55
+
56
+ {
57
+ int c=TheAlphabet.symbol2code(symbol);
58
+ if (c == EOF)
59
+ c = TheAlphabet.add_symbol( symbol );
60
+ free(symbol);
61
+ return (Character)c;
62
+ }
63
+
64
+
65
+ /*******************************************************************/
66
+ /* */
67
+ /* Interface::character_code */
68
+ /* */
69
+ /*******************************************************************/
70
+
71
+ Character Interface::character_code( unsigned int uc )
72
+
73
+ {
74
+ if (TheAlphabet.utf8)
75
+ return symbol_code(fst_strdup(int2utf8(uc)));
76
+
77
+ unsigned char *buffer=(unsigned char*)malloc(2);
78
+ buffer[0] = (unsigned char)uc;
79
+ buffer[1] = 0;
80
+
81
+ return symbol_code((char*)buffer);
82
+ }
83
+
84
+
85
+ /*******************************************************************/
86
+ /* */
87
+ /* Interface::add_value */
88
+ /* */
89
+ /*******************************************************************/
90
+
91
+ Range *Interface::add_value( Character c, Range *r )
92
+
93
+ {
94
+ Range *result=new Range;
95
+ result->character = c;
96
+ result->next = r;
97
+ return result;
98
+ }
99
+
100
+
101
+ /*******************************************************************/
102
+ /* */
103
+ /* Interface::add_values */
104
+ /* */
105
+ /*******************************************************************/
106
+
107
+ Range *Interface::add_values( unsigned int c1, unsigned int c2, Range *r )
108
+
109
+ {
110
+ for( unsigned int c=c2; c>=c1; c-- )
111
+ r = add_value(character_code(c), r);
112
+ return r;
113
+ }
114
+
115
+
116
+ /*******************************************************************/
117
+ /* */
118
+ /* Interface::append_values */
119
+ /* */
120
+ /*******************************************************************/
121
+
122
+ Range *Interface::append_values( Range *r2, Range *r )
123
+
124
+ {
125
+ if (r2 == NULL)
126
+ return r;
127
+ return add_value(r2->character, append_values(r2->next, r));
128
+ }
129
+
130
+
131
+ /*******************************************************************/
132
+ /* */
133
+ /* Interface::add_var_values */
134
+ /* */
135
+ /*******************************************************************/
136
+
137
+ Range *Interface::add_var_values( char *name, Range *r )
138
+
139
+ {
140
+ return append_values(svar_value(name), r);
141
+ }
142
+
143
+
144
+ /*******************************************************************/
145
+ /* */
146
+ /* Interface::in_range */
147
+ /* */
148
+ /*******************************************************************/
149
+
150
+ bool Interface::in_range( unsigned int c, Range *r )
151
+
152
+ {
153
+ while (r) {
154
+ if (r->character == c)
155
+ return true;
156
+ r = r->next;
157
+ }
158
+ return false;
159
+ }
160
+
161
+
162
+ /*******************************************************************/
163
+ /* */
164
+ /* free_values */
165
+ /* */
166
+ /*******************************************************************/
167
+
168
+ static void free_values( Range *r )
169
+
170
+ {
171
+ if (r) {
172
+ free_values(r->next);
173
+ delete r;
174
+ }
175
+ }
176
+
177
+
178
+ /*******************************************************************/
179
+ /* */
180
+ /* free_values */
181
+ /* */
182
+ /*******************************************************************/
183
+
184
+ static void free_values( Ranges *r )
185
+
186
+ {
187
+ if (r) {
188
+ free_values(r->next);
189
+ delete r;
190
+ }
191
+ }
192
+
193
+
194
+ /*******************************************************************/
195
+ /* */
196
+ /* free_contexts */
197
+ /* */
198
+ /*******************************************************************/
199
+
200
+ static void free_contexts( Contexts *c )
201
+
202
+ {
203
+ if (c) {
204
+ free_contexts(c->next);
205
+ delete c;
206
+ }
207
+ }
208
+
209
+
210
+ /*******************************************************************/
211
+ /* */
212
+ /* Interface::copy_values */
213
+ /* */
214
+ /*******************************************************************/
215
+
216
+ Range *Interface::copy_values( const Range *r )
217
+
218
+ {
219
+ if (r == NULL)
220
+ return NULL;
221
+ return add_value( r->character, copy_values(r->next));
222
+ }
223
+
224
+
225
+ /*******************************************************************/
226
+ /* */
227
+ /* Interface::complement_range */
228
+ /* */
229
+ /*******************************************************************/
230
+
231
+ Range *Interface::complement_range( Range *r )
232
+
233
+ {
234
+ vector<Character> sym;
235
+ for( Range *p=r; p; p=p->next)
236
+ sym.push_back( p->character );
237
+ free_values( r );
238
+
239
+ TheAlphabet.complement(sym);
240
+ if (sym.size() == 0)
241
+ error("Empty character range!");
242
+
243
+ Range *result=NULL;
244
+ for( size_t i=0; i<sym.size(); i++ ) {
245
+ Range *tmp = new Range;
246
+ tmp->character = sym[i];
247
+ tmp->next = result;
248
+ result = tmp;
249
+ }
250
+
251
+ return result;
252
+ }
253
+
254
+
255
+ /*******************************************************************/
256
+ /* */
257
+ /* Interface::make_transducer */
258
+ /* */
259
+ /*******************************************************************/
260
+
261
+ Transducer *Interface::make_transducer( Range *r1, Range *r2 )
262
+
263
+ {
264
+ Transducer *t=new Transducer();
265
+ Node *node=t->new_node();
266
+ node->set_final(1);
267
+
268
+ if (r1 == NULL || r2 == NULL) {
269
+ if (!Alphabet_Defined)
270
+ error("The wildcard symbol '.' requires the definition of an alphabet");
271
+
272
+ // one of the ranges was '.'
273
+ for(Alphabet::const_iterator it=TheAlphabet.begin();
274
+ it!=TheAlphabet.end(); it++)
275
+ if ((r1 == NULL || in_range(it->lower_char(), r1)) &&
276
+ (r2 == NULL || in_range(it->upper_char(), r2)))
277
+ t->root_node()->add_arc( *it, node, t );
278
+ }
279
+ else {
280
+ for (;;) {
281
+ Label l(r1->character, r2->character);
282
+ // TheAlphabet.insert(l);
283
+ t->root_node()->add_arc( l, node, t );
284
+ if (!r1->next && !r2->next)
285
+ break;
286
+ if (r1->next)
287
+ r1 = r1->next;
288
+ if (r2->next)
289
+ r2 = r2->next;
290
+ }
291
+ }
292
+
293
+ return t;
294
+ }
295
+
296
+
297
+ /*******************************************************************/
298
+ /* */
299
+ /* Interface::one_label_transducer */
300
+ /* */
301
+ /*******************************************************************/
302
+
303
+ Transducer *Interface::one_label_transducer( Label l )
304
+
305
+ {
306
+ Transducer *t = new Transducer();
307
+ Node *last = t->new_node();
308
+ t->root_node()->add_arc( l, last, t );
309
+ last->set_final(1);
310
+
311
+ return t;
312
+ }
313
+
314
+
315
+ /*******************************************************************/
316
+ /* */
317
+ /* Interface::new_transducer */
318
+ /* */
319
+ /*******************************************************************/
320
+
321
+ Transducer *Interface::new_transducer( Range *r1, Range *r2 )
322
+
323
+ {
324
+ Transducer *t=make_transducer( r1, r2);
325
+ if (r1 != r2)
326
+ free_values(r1);
327
+ free_values(r2);
328
+ return t;
329
+ }
330
+
331
+
332
+ /*******************************************************************/
333
+ /* */
334
+ /* Interface::read_words */
335
+ /* */
336
+ /*******************************************************************/
337
+
338
+ Transducer *Interface::read_words( char *filename )
339
+
340
+ {
341
+ if (Verbose)
342
+ fprintf(stderr,"\nreading words from %s...", filename);
343
+ ifstream is(filename);
344
+ if (!is.is_open()) {
345
+ static char message[1000];
346
+ sprintf(message,"Error: Cannot open file \"%s\"!", filename);
347
+ throw message;
348
+ }
349
+ free( filename );
350
+ Transducer *t = new Transducer(is, &TheAlphabet, Verbose, LexiconComments);
351
+ is.close();
352
+ TheAlphabet.insert_symbols(t->alphabet);
353
+ if (Verbose)
354
+ fprintf(stderr,"finished\n");
355
+ return t;
356
+ }
357
+
358
+
359
+ /*******************************************************************/
360
+ /* */
361
+ /* Interface::read_transducer */
362
+ /* */
363
+ /*******************************************************************/
364
+
365
+ Transducer *Interface::read_transducer( char *filename )
366
+
367
+ {
368
+ if (Verbose)
369
+ fprintf(stderr,"\nreading transducer from %s...", filename);
370
+ FILE *file = fopen(filename,"rb");
371
+ if (file == NULL) {
372
+ static char message[1000];
373
+ sprintf(message,"Error: Cannot open file \"%s\"!",filename);
374
+ throw message;
375
+ }
376
+ Transducer t(file);
377
+ fclose(file);
378
+ if (t.alphabet.utf8 != TheAlphabet.utf8) {
379
+ static char message[1000];
380
+ sprintf(message,"Error: incompatible character encoding in file \"%s\"!",
381
+ filename);
382
+ throw message;
383
+ }
384
+ free( filename );
385
+ // transfer the encoding og TheAlphabet to the transducer
386
+ Transducer *nt = &t.copy(false, &TheAlphabet);
387
+ TheAlphabet.insert_symbols(nt->alphabet);
388
+ if (Verbose)
389
+ fprintf(stderr,"finished\n");
390
+ return nt;
391
+ }
392
+
393
+
394
+ /*******************************************************************/
395
+ /* */
396
+ /* Interface::def_alphabet */
397
+ /* */
398
+ /*******************************************************************/
399
+
400
+ void Interface::def_alphabet( Transducer *t )
401
+
402
+ {
403
+ t = explode(t);
404
+ t = minimise(t);
405
+ t->alphabet.clear_char_pairs();
406
+ t->complete_alphabet();
407
+ TheAlphabet.clear_char_pairs();
408
+ TheAlphabet.copy(t->alphabet);
409
+ Alphabet_Defined = 1;
410
+ delete t;
411
+ }
412
+
413
+
414
+ /*******************************************************************/
415
+ /* */
416
+ /* Interface::def_svar definition of a value range variable */
417
+ /* */
418
+ /*******************************************************************/
419
+
420
+ bool Interface::def_svar( char *name, Range *r )
421
+
422
+ {
423
+ // delete the old value of the variable
424
+ SVarMap::iterator it=SVM.find(name);
425
+ if (it != SVM.end()) {
426
+ char *n=it->first;
427
+ Range *v=it->second;
428
+ SVM.erase(it);
429
+ delete v;
430
+ free(n);
431
+ }
432
+ SVM[name] = r;
433
+ return r == NULL;
434
+ }
435
+
436
+
437
+ /*******************************************************************/
438
+ /* */
439
+ /* Interface::svar_value */
440
+ /* */
441
+ /*******************************************************************/
442
+
443
+ Range *Interface::svar_value( char *name )
444
+
445
+ {
446
+ SVarMap::iterator it=SVM.find(name);
447
+ if (it == SVM.end())
448
+ error2("undefined variable", name);
449
+ free(name);
450
+ return copy_values(it->second);
451
+ }
452
+
453
+
454
+ /*******************************************************************/
455
+ /* */
456
+ /* Interface::rsvar_value */
457
+ /* */
458
+ /*******************************************************************/
459
+
460
+ Range *Interface::rsvar_value( char *name )
461
+
462
+ {
463
+ if (RSS.find(name) == RSS.end())
464
+ RSS.insert(fst_strdup(name));
465
+ return add_value(symbol_code(name), NULL);
466
+ }
467
+
468
+
469
+ /*******************************************************************/
470
+ /* */
471
+ /* Interface::def_var definition of a transducer variable */
472
+ /* */
473
+ /*******************************************************************/
474
+
475
+ bool Interface::def_var( char *name, Transducer *t )
476
+
477
+ {
478
+ // delete the old value of the variable
479
+ VarMap::iterator it=VM.find(name);
480
+ if (it != VM.end()) {
481
+ char *n=it->first;
482
+ Transducer *v=it->second;
483
+ VM.erase(it);
484
+ delete v;
485
+ free(n);
486
+ }
487
+
488
+ t = explode(t);
489
+ t = minimise(t);
490
+
491
+ VM[name] = t;
492
+ return t->is_empty();
493
+ }
494
+
495
+
496
+ /*******************************************************************/
497
+ /* */
498
+ /* Interface::def_rvar */
499
+ /* definition of an agreement variable for automata */
500
+ /* */
501
+ /*******************************************************************/
502
+
503
+ bool Interface::def_rvar( char *name, Transducer *t )
504
+
505
+ {
506
+ if (t->is_cyclic())
507
+ error2("cyclic transducer assigned to", name);
508
+ return def_var( name, t );
509
+ }
510
+
511
+
512
+ /*******************************************************************/
513
+ /* */
514
+ /* Interface::var_value */
515
+ /* */
516
+ /*******************************************************************/
517
+
518
+ Transducer *Interface::var_value( char *name )
519
+
520
+ {
521
+ VarMap::iterator it=VM.find(name);
522
+ if (it == VM.end())
523
+ error2("undefined variable", name);
524
+ free(name);
525
+ return &(it->second->copy());
526
+ }
527
+
528
+
529
+ /*******************************************************************/
530
+ /* */
531
+ /* Interface::rvar_value */
532
+ /* */
533
+ /*******************************************************************/
534
+
535
+ Transducer *Interface::rvar_value( char *name )
536
+
537
+ {
538
+ if (RS.find(name) == RS.end())
539
+ RS.insert(fst_strdup(name));
540
+ Range *r=add_value(symbol_code(name), NULL);
541
+ return new_transducer(r,r);
542
+ }
543
+
544
+
545
+ /*******************************************************************/
546
+ /* */
547
+ /* Interface::explode */
548
+ /* */
549
+ /*******************************************************************/
550
+
551
+ Transducer *Interface::explode( Transducer *t )
552
+
553
+ {
554
+ if (RS.size() == 0 && RSS.size() == 0)
555
+ return t;
556
+
557
+ t = minimise(t);
558
+
559
+ vector<char*> name;
560
+ for( RVarSet::iterator it=RS.begin(); it!=RS.end(); it++)
561
+ name.push_back(*it);
562
+ RS.clear();
563
+
564
+ // replace all agreement variables
565
+ for( size_t i=0; i<name.size(); i++ ) {
566
+ Transducer *nt = NULL;
567
+ Label l((Character)TheAlphabet.symbol2code(name[i]));
568
+ Transducer *vt=var_value(name[i]);
569
+
570
+ // enumerate all paths of the transducer
571
+ vector<Transducer*> it;
572
+ vt->enumerate_paths(it);
573
+ delete vt;
574
+
575
+ // insert each path
576
+ for( size_t i=0; i<it.size(); i++ ) {
577
+
578
+ // insertion
579
+ Transducer *t1 = &t->splice(l, it[i]);
580
+ delete it[i];
581
+
582
+ if (nt == NULL)
583
+ nt = t1;
584
+ else
585
+ nt = disjunction(nt, t1);
586
+ }
587
+ delete t;
588
+ t = nt;
589
+ }
590
+
591
+ name.clear();
592
+ for( RVarSet::iterator it=RSS.begin(); it!=RSS.end(); it++)
593
+ name.push_back(*it);
594
+ RSS.clear();
595
+
596
+ // replace all agreement variables
597
+ for( size_t i=0; i<name.size(); i++ ) {
598
+ Transducer *nt = NULL;
599
+ Character c=(Character)TheAlphabet.symbol2code(name[i]);
600
+ Range *r=svar_value(name[i]);
601
+
602
+ // insert each character
603
+ while (r != NULL) {
604
+
605
+ // insertion
606
+ Transducer *t1 = &t->replace_char(c, r->character);
607
+
608
+ if (nt == NULL)
609
+ nt = t1;
610
+ else
611
+ nt = disjunction(nt, t1);
612
+
613
+ Range *next = r->next;
614
+ delete r;
615
+ r = next;
616
+ }
617
+ delete t;
618
+ t = nt;
619
+ }
620
+
621
+ return t;
622
+ }
623
+
624
+
625
+ /*******************************************************************/
626
+ /* */
627
+ /* Interface::catenate */
628
+ /* */
629
+ /*******************************************************************/
630
+
631
+ Transducer *Interface::catenate( Transducer *t1, Transducer *t2 )
632
+
633
+ {
634
+ Transducer *t = &(*t1 + *t2);
635
+ delete t1;
636
+ delete t2;
637
+ return t;
638
+ }
639
+
640
+
641
+ /*******************************************************************/
642
+ /* */
643
+ /* Interface::add_range */
644
+ /* */
645
+ /*******************************************************************/
646
+
647
+ Ranges *Interface::add_range( Range *r, Ranges *l )
648
+
649
+ {
650
+ Ranges *result = new Ranges;
651
+ result->range = r;
652
+ result->next = l;
653
+ return result;
654
+ }
655
+
656
+
657
+ /*******************************************************************/
658
+ /* */
659
+ /* Interface::make_mapping */
660
+ /* */
661
+ /*******************************************************************/
662
+
663
+ Transducer *Interface::make_mapping( Ranges *list1, Ranges *list2 )
664
+
665
+ {
666
+ Ranges *l1=list1;
667
+ Ranges *l2=list2;
668
+ Transducer *t=new Transducer();
669
+
670
+ Node *node=t->root_node();
671
+ while (l1 && l2) {
672
+ Node *nn=t->new_node();
673
+ for( Range *r1=l1->range; r1; r1=r1->next )
674
+ for( Range *r2=l2->range; r2; r2=r2->next )
675
+ node->add_arc( Label(r1->character, r2->character), nn, t );
676
+ node = nn;
677
+ l1 = l1->next;
678
+ l2 = l2->next;
679
+ }
680
+ while (l1) {
681
+ Node *nn=t->new_node();
682
+ for( Range *r1=l1->range; r1; r1=r1->next )
683
+ node->add_arc( Label(r1->character, Label::epsilon), nn, t );
684
+ node = nn;
685
+ l1 = l1->next;
686
+ }
687
+ while (l2) {
688
+ Node *nn=t->new_node();
689
+ for( Range *r2=l2->range; r2; r2=r2->next )
690
+ node->add_arc( Label(Label::epsilon, r2->character), nn, t );
691
+ node = nn;
692
+ l2 = l2->next;
693
+ }
694
+ node->set_final(1);
695
+
696
+ free_values(list1);
697
+ free_values(list2);
698
+ return t;
699
+ }
700
+
701
+
702
+ /*******************************************************************/
703
+ /* */
704
+ /* Interface::disjunction */
705
+ /* */
706
+ /*******************************************************************/
707
+
708
+ Transducer *Interface::disjunction( Transducer *t1, Transducer *t2 )
709
+
710
+ {
711
+ Transducer *t = &(*t1 | *t2);
712
+ delete t1;
713
+ delete t2;
714
+ return t;
715
+ }
716
+
717
+
718
+ /*******************************************************************/
719
+ /* */
720
+ /* Interface::conjunction */
721
+ /* */
722
+ /*******************************************************************/
723
+
724
+ Transducer *Interface::conjunction( Transducer *t1, Transducer *t2 )
725
+
726
+ {
727
+ if (RS.size() > 0 || RSS.size() > 0)
728
+ cerr << "\nWarning: agreement operation inside of conjunction!\n";
729
+ Transducer *t = &(*t1 & *t2);
730
+ delete t1;
731
+ delete t2;
732
+ return t;
733
+ }
734
+
735
+
736
+ /*******************************************************************/
737
+ /* */
738
+ /* Interface::subtraction */
739
+ /* */
740
+ /*******************************************************************/
741
+
742
+ Transducer *Interface::subtraction( Transducer *t1, Transducer *t2 )
743
+
744
+ {
745
+ t1->alphabet.copy(TheAlphabet);
746
+
747
+ if (RS.size() > 0 || RSS.size() > 0)
748
+ cerr << "\nWarning: agreement operation inside of conjunction!\n";
749
+ Transducer *t = &(*t1 / *t2);
750
+ delete t1;
751
+ delete t2;
752
+ return t;
753
+ }
754
+
755
+
756
+ /*******************************************************************/
757
+ /* */
758
+ /* Interface::composition */
759
+ /* */
760
+ /*******************************************************************/
761
+
762
+ Transducer *Interface::composition( Transducer *t1, Transducer *t2 )
763
+
764
+ {
765
+ if (RS.size() > 0 || RSS.size() > 0)
766
+ cerr << "\nWarning: agreement operation inside of composition!\n";
767
+ Transducer *t = &(*t1 || *t2);
768
+ delete t1;
769
+ delete t2;
770
+ return t;
771
+ }
772
+
773
+ /*******************************************************************/
774
+ /* */
775
+ /* Interface::freely_insert */
776
+ /* */
777
+ /*******************************************************************/
778
+
779
+ Transducer *Interface::freely_insert( Transducer *t,
780
+ Character lc, Character uc )
781
+ {
782
+ return &t->freely_insert(Label(lc,uc));
783
+ }
784
+
785
+
786
+ /*******************************************************************/
787
+ /* */
788
+ /* Interface::negation */
789
+ /* */
790
+ /*******************************************************************/
791
+
792
+ Transducer *Interface::negation( Transducer *t )
793
+
794
+ {
795
+ if (RS.size() > 0 || RSS.size() > 0)
796
+ cerr << "\nWarning: agreement operation inside of negation!\n";
797
+ if (!Alphabet_Defined)
798
+ error("Negation requires the definition of an alphabet");
799
+ t->alphabet.clear_char_pairs();
800
+ t->alphabet.copy(TheAlphabet);
801
+ Transducer *nt = &(!*t);
802
+ delete t;
803
+ return nt;
804
+ }
805
+
806
+
807
+ /*******************************************************************/
808
+ /* */
809
+ /* Interface::upper_level */
810
+ /* */
811
+ /*******************************************************************/
812
+
813
+ Transducer *Interface::upper_level( Transducer *t )
814
+
815
+ {
816
+ Transducer *nt = &t->upper_level();
817
+ delete t;
818
+ return nt;
819
+ }
820
+
821
+
822
+ /*******************************************************************/
823
+ /* */
824
+ /* Interface::lower_level */
825
+ /* */
826
+ /*******************************************************************/
827
+
828
+ Transducer *Interface::lower_level( Transducer *t )
829
+
830
+ {
831
+ Transducer *nt = &t->lower_level();
832
+ delete t;
833
+ return nt;
834
+ }
835
+
836
+
837
+ /*******************************************************************/
838
+ /* */
839
+ /* Interface::minimise */
840
+ /* */
841
+ /*******************************************************************/
842
+
843
+ Transducer *Interface::minimise( Transducer *t )
844
+
845
+ {
846
+ t->alphabet.copy(TheAlphabet);
847
+ Transducer *nt = &t->minimise( Verbose );
848
+ delete t;
849
+ return nt;
850
+ }
851
+
852
+
853
+ /*******************************************************************/
854
+ /* */
855
+ /* Interface::switch_levels */
856
+ /* */
857
+ /*******************************************************************/
858
+
859
+ Transducer *Interface::switch_levels( Transducer *t )
860
+
861
+ {
862
+ Transducer *nt = &t->switch_levels();
863
+ delete t;
864
+ return nt;
865
+ }
866
+
867
+
868
+ /*******************************************************************/
869
+ /* */
870
+ /* Interface::repetition */
871
+ /* */
872
+ /*******************************************************************/
873
+
874
+ Transducer *Interface::repetition( Transducer *t )
875
+
876
+ {
877
+ Transducer *nt = &(t->kleene_star());
878
+ delete t;
879
+ return nt;
880
+ }
881
+
882
+
883
+ /*******************************************************************/
884
+ /* */
885
+ /* Interface::repetition2 */
886
+ /* */
887
+ /*******************************************************************/
888
+
889
+ Transducer *Interface::repetition2( Transducer *t )
890
+
891
+ {
892
+ Transducer *t1 = &(t->kleene_star());
893
+ Transducer *nt = &(*t + *t1);
894
+ delete t;
895
+ delete t1;
896
+ return nt;
897
+ }
898
+
899
+
900
+ /*******************************************************************/
901
+ /* */
902
+ /* Interface::optional */
903
+ /* */
904
+ /*******************************************************************/
905
+
906
+ Transducer *Interface::optional( Transducer *t )
907
+
908
+ {
909
+ Transducer *nt = &(t->copy());
910
+ nt->root_node()->set_final(1);
911
+ delete t;
912
+ return nt;
913
+ }
914
+
915
+
916
+ /*******************************************************************/
917
+ /* */
918
+ /* Interface::add_pi_transitions */
919
+ /* */
920
+ /*******************************************************************/
921
+
922
+ void Interface::add_pi_transitions( Transducer *t, Node *node, Alphabet &alph)
923
+
924
+ {
925
+ for( Alphabet::const_iterator it=alph.begin(); it!=alph.end(); it++) {
926
+ Label l = *it;
927
+ node->add_arc( l, node, t );
928
+ }
929
+ }
930
+
931
+
932
+ /*******************************************************************/
933
+ /* */
934
+ /* Interface::pi_machine */
935
+ /* */
936
+ /*******************************************************************/
937
+
938
+ Transducer *Interface::pi_machine( Alphabet &alph )
939
+
940
+ {
941
+ Transducer *t=new Transducer();
942
+ t->root_node()->set_final(1);
943
+ add_pi_transitions( t, t->root_node(), alph );
944
+ return t;
945
+ }
946
+
947
+
948
+ /*******************************************************************/
949
+ /* */
950
+ /* Interface::empty_string_transducer */
951
+ /* */
952
+ /*******************************************************************/
953
+
954
+ Transducer *Interface::empty_string_transducer( void )
955
+
956
+ {
957
+ Transducer *t=new Transducer();
958
+ t->root_node()->set_final(1);
959
+ return t;
960
+ }
961
+
962
+
963
+ /*******************************************************************/
964
+ /* */
965
+ /* Interface::cp */
966
+ /* */
967
+ /*******************************************************************/
968
+
969
+ Transducer *Interface::cp( Range *lower_range, Range *upper_range )
970
+
971
+ {
972
+ Transducer *t = make_transducer(lower_range, upper_range);
973
+ for( ArcsIter p(t->root_node()->arcs()); p; p++ ) {
974
+ Arc *arc=p;
975
+ if (TheAlphabet.find(arc->label()) == TheAlphabet.end())
976
+ fprintf(stderr,"Warning: 2-level rule mapping \"%s\" not defined in alphabet!\n",
977
+ TheAlphabet.write_label(arc->label()));
978
+ }
979
+
980
+ return t;
981
+ }
982
+
983
+
984
+ /*******************************************************************/
985
+ /* */
986
+ /* Interface::anti_cp */
987
+ /* */
988
+ /*******************************************************************/
989
+
990
+ Transducer *Interface::anti_cp( Range *lower_range, Range *upper_range )
991
+
992
+ {
993
+ Transducer *cpt = cp(lower_range, upper_range);
994
+ Transducer *t=new Transducer();
995
+ Node *node=t->new_node();
996
+
997
+ node->set_final(1);
998
+ for(Alphabet::const_iterator it=TheAlphabet.begin();
999
+ it!=TheAlphabet.end(); it++){
1000
+ Label l=*it;
1001
+ if (in_range(l.lower_char(), lower_range) &&
1002
+ !cpt->root_node()->target_node(l))
1003
+ t->root_node()->add_arc( l, node, t );
1004
+ }
1005
+ if (in_range(Label::epsilon, lower_range) &&
1006
+ !cpt->root_node()->target_node(Label()))
1007
+ t->root_node()->add_arc( Label(), node, t );
1008
+
1009
+ delete cpt;
1010
+ return t;
1011
+ }
1012
+
1013
+
1014
+ /*******************************************************************/
1015
+ /* */
1016
+ /* Interface::twol_right_rule */
1017
+ /* */
1018
+ /*******************************************************************/
1019
+
1020
+ Transducer *Interface::twol_right_rule( Transducer *lc, Range *lower_range,
1021
+ Range *upper_range, Transducer *rc )
1022
+ {
1023
+ // Build the rule transducer
1024
+ Transducer *cpt = cp(lower_range, upper_range);
1025
+ Transducer *pi=pi_machine(TheAlphabet);
1026
+
1027
+ // First unwanted language
1028
+
1029
+ lc->alphabet.copy(TheAlphabet);
1030
+ Transducer *notlc = &(!*lc);
1031
+ Transducer *tmp = &(*notlc + *cpt);
1032
+ delete notlc;
1033
+ Transducer *t1 = &(*tmp + *pi);
1034
+ delete tmp;
1035
+
1036
+ // Second unwanted language
1037
+ rc->alphabet.copy(TheAlphabet);
1038
+ Transducer *notrc = &(!*rc);
1039
+ tmp = &(*cpt + *notrc);
1040
+ delete cpt;
1041
+ delete notrc;
1042
+ Transducer *t2 = &(*pi + *tmp);
1043
+ delete pi;
1044
+ delete tmp;
1045
+
1046
+ tmp = &(*t1|*t2);
1047
+ delete t1;
1048
+ delete t2;
1049
+
1050
+ tmp->alphabet.copy(TheAlphabet);
1051
+ t1 = &(!*tmp);
1052
+ delete tmp;
1053
+
1054
+ return t1;
1055
+ }
1056
+
1057
+
1058
+ /*******************************************************************/
1059
+ /* */
1060
+ /* Interface::twol_left_rule */
1061
+ /* */
1062
+ /*******************************************************************/
1063
+
1064
+ Transducer *Interface::twol_left_rule( Transducer *lc, Range *lower_range,
1065
+ Range *upper_range, Transducer *rc )
1066
+ {
1067
+ // check for problematic insertion operations like "$L <> <= a $R"
1068
+ // where either $L or $R includes the empty string
1069
+ if (in_range(Label::epsilon, lower_range)) {
1070
+ if (lc->generates_empty_string())
1071
+ error("in two level rule: insertion operation with deletable left context!");
1072
+ if (rc->generates_empty_string())
1073
+ error("in two level rule: insertion operation with deletable right context!");
1074
+ cerr << "\nWarning: two level rule used for insertion operation (might produce unexpected results)\n";
1075
+ }
1076
+
1077
+ // Build the rule transducer
1078
+ Transducer *t1 = anti_cp(lower_range, upper_range);
1079
+
1080
+ // Add the left context;
1081
+ Transducer *t2 = &(*lc + *t1);
1082
+ delete t1;
1083
+
1084
+ // Add the right context;
1085
+ t1 = &(*t2 + *rc);
1086
+ delete t2;
1087
+
1088
+ // Form the complement
1089
+ t1->alphabet.copy(TheAlphabet);
1090
+ t2 = &(!*t1);
1091
+ delete t1;
1092
+
1093
+ return t2;
1094
+ }
1095
+
1096
+
1097
+ /*******************************************************************/
1098
+ /* */
1099
+ /* Interface::make_rule */
1100
+ /* */
1101
+ /*******************************************************************/
1102
+
1103
+ Transducer *Interface::make_rule( Transducer *lc, Range *lower_range,
1104
+ Twol_Type type, Range *upper_range,
1105
+ Transducer *rc )
1106
+ {
1107
+ if (RS.size() > 0 || RSS.size() > 0)
1108
+ cerr << "\nWarning: agreement operation inside of replacement rule!\n";
1109
+
1110
+ if (!Alphabet_Defined)
1111
+ error("Two level rules require the definition of an alphabet");
1112
+
1113
+ // expand the left and the right contexts to their full length
1114
+ Transducer *pi=pi_machine(TheAlphabet);
1115
+
1116
+ if (lc == NULL)
1117
+ lc = pi_machine(TheAlphabet);
1118
+ else {
1119
+ Transducer *tmp = &(*pi + *lc);
1120
+ delete lc;
1121
+ lc = tmp;
1122
+ }
1123
+ if (rc == NULL)
1124
+ rc = pi_machine(TheAlphabet);
1125
+ else {
1126
+ Transducer *tmp = &(*rc + *pi);
1127
+ delete rc;
1128
+ rc = tmp;
1129
+ }
1130
+ delete pi;
1131
+
1132
+ Transducer *result = NULL;
1133
+
1134
+ switch (type) {
1135
+ case twol_left:
1136
+ result = twol_left_rule(lc, lower_range, upper_range, rc);
1137
+ break;
1138
+ case twol_right:
1139
+ result = twol_right_rule(lc, lower_range, upper_range, rc);
1140
+ break;
1141
+ case twol_both:
1142
+ {
1143
+ Transducer *t1 = twol_left_rule(lc, lower_range, upper_range, rc);
1144
+ Transducer *t2 = twol_right_rule(lc, lower_range, upper_range, rc);
1145
+ result = &(*t1 & *t2);
1146
+ delete t1;
1147
+ delete t2;
1148
+ }
1149
+ }
1150
+ delete lc;
1151
+ delete rc;
1152
+ if (lower_range != upper_range)
1153
+ free_values(lower_range);
1154
+ free_values(upper_range);
1155
+
1156
+ return minimise(result);
1157
+ }
1158
+
1159
+
1160
+ /*******************************************************************/
1161
+ /* */
1162
+ /* Interface::make_context */
1163
+ /* */
1164
+ /*******************************************************************/
1165
+
1166
+ Contexts *Interface::make_context( Transducer *l, Transducer *r )
1167
+
1168
+ {
1169
+ if (l == NULL)
1170
+ l = empty_string_transducer();
1171
+ if (r == NULL)
1172
+ r = empty_string_transducer();
1173
+
1174
+ Contexts *c=new Contexts();
1175
+ c->left = l;
1176
+ c->right = r;
1177
+ c->next = NULL;
1178
+
1179
+ return c;
1180
+ }
1181
+
1182
+
1183
+ /*******************************************************************/
1184
+ /* */
1185
+ /* Interface::add_context */
1186
+ /* */
1187
+ /*******************************************************************/
1188
+
1189
+ Contexts *Interface::add_context( Contexts *nc, Contexts *c )
1190
+
1191
+ {
1192
+ nc->next = c;
1193
+ return nc;
1194
+ }
1195
+
1196
+
1197
+ /*******************************************************************/
1198
+ /* */
1199
+ /* Interface::restriction_transducer */
1200
+ /* */
1201
+ /*******************************************************************/
1202
+
1203
+ Transducer *Interface::restriction_transducer( Transducer *l1, Transducer *l2,
1204
+ Character marker )
1205
+ {
1206
+ l1->alphabet.copy(TheAlphabet);
1207
+ Transducer *t1 = &(*l1 / *l2);
1208
+
1209
+ Transducer *t2 = &t1->replace_char(marker, Label::epsilon);
1210
+ delete t1;
1211
+
1212
+ t2->alphabet.copy(TheAlphabet);
1213
+ t1 = &(!*t2);
1214
+ delete t2;
1215
+
1216
+ return t1;
1217
+ }
1218
+
1219
+
1220
+ /*******************************************************************/
1221
+ /* */
1222
+ /* Interface::marker_transducer */
1223
+ /* */
1224
+ /*******************************************************************/
1225
+
1226
+ Transducer *Interface::marker_transducer( Transducer *t, Contexts *c,
1227
+ Character &marker )
1228
+ {
1229
+ marker = TheAlphabet.new_marker();
1230
+ Transducer *result = one_label_transducer( Label(marker) );
1231
+
1232
+ // build the alphabet with a new marker
1233
+ result->alphabet.insert_symbols(t->alphabet);
1234
+ while (c) {
1235
+ result->alphabet.insert_symbols(c->left->alphabet);
1236
+ result->alphabet.insert_symbols(c->right->alphabet);
1237
+ c = c->next;
1238
+ }
1239
+
1240
+ return result;
1241
+ }
1242
+
1243
+
1244
+ /*******************************************************************/
1245
+ /* */
1246
+ /* Interface::center_transducer */
1247
+ /* */
1248
+ /*******************************************************************/
1249
+
1250
+ Transducer *Interface::center_transducer( Transducer *t, Transducer *pi,
1251
+ Transducer *mt )
1252
+ {
1253
+ // create the concatenation pi + mt + *t + mt + pi
1254
+ Transducer *t1=&(*pi + *mt);
1255
+ Transducer *t2=&(*t1 + *t);
1256
+ delete t1;
1257
+ t1 = &(*t2 + *mt);
1258
+ delete t2;
1259
+ t2 = &(*t1 + *pi);
1260
+ delete t1;
1261
+ return t2;
1262
+ }
1263
+
1264
+
1265
+ /*******************************************************************/
1266
+ /* */
1267
+ /* Interface::context_transducer */
1268
+ /* */
1269
+ /*******************************************************************/
1270
+
1271
+ Transducer *Interface::context_transducer( Transducer *t, Transducer *pi,
1272
+ Transducer *mt, Contexts *c )
1273
+ {
1274
+ // pi + left[i] + mt + pi + mt + right[i] + pi
1275
+
1276
+ Transducer *t1 = &(*mt + *t);
1277
+ Transducer *tmp = &(*t1 + *mt);
1278
+ delete t1;
1279
+ Transducer *result=NULL;
1280
+
1281
+ while (c) {
1282
+ t1 = &(*pi + *c->left);
1283
+ Transducer *t2 = &(*t1 + *tmp);
1284
+ delete t1;
1285
+ t1 = &(*t2 + *c->right);
1286
+ delete t2;
1287
+ t2 = &(*t1 + *pi);
1288
+ delete t1;
1289
+
1290
+ if (result) {
1291
+ t1 = &(*result | *t2);
1292
+ delete t2;
1293
+ result = t1;
1294
+ }
1295
+ else
1296
+ result = t2;
1297
+
1298
+ c = c->next;
1299
+ }
1300
+ delete tmp;
1301
+
1302
+ return result;
1303
+ }
1304
+
1305
+
1306
+
1307
+ /*******************************************************************/
1308
+ /* */
1309
+ /* Interface::result_transducer */
1310
+ /* */
1311
+ /*******************************************************************/
1312
+
1313
+ Transducer *Interface::result_transducer( Transducer *l1, Transducer *l2,
1314
+ Twol_Type type, Character marker )
1315
+ {
1316
+ Transducer *result=NULL;
1317
+ if (type == twol_right)
1318
+ result = restriction_transducer( l1, l2, marker );
1319
+ else if (type == twol_left)
1320
+ result = restriction_transducer( l2, l1, marker );
1321
+ else if (type == twol_both) {
1322
+ Transducer *t1 = restriction_transducer( l1, l2, marker );
1323
+ Transducer *t2 = restriction_transducer( l2, l1, marker );
1324
+ result = &(*t1 & *t2);
1325
+ delete t1;
1326
+ delete t2;
1327
+ }
1328
+
1329
+ return result;
1330
+ }
1331
+
1332
+
1333
+ /*******************************************************************/
1334
+ /* */
1335
+ /* Interface::restriction */
1336
+ /* */
1337
+ /*******************************************************************/
1338
+
1339
+ Transducer *Interface::restriction( Transducer *t, Twol_Type type,
1340
+ Contexts *c, int direction )
1341
+ {
1342
+ Character marker;
1343
+ Transducer *mt=marker_transducer( t, c, marker );
1344
+ Transducer *pi=pi_machine(TheAlphabet);
1345
+ Transducer *l1=center_transducer( t, pi, mt );
1346
+
1347
+ Transducer *tmp;
1348
+ if (direction == 0)
1349
+ tmp = pi;
1350
+ else if (direction == 1) {
1351
+ // compute _t || .*
1352
+ Transducer *t1 = &t->lower_level();
1353
+ tmp = &(*t1 || *pi);
1354
+ delete t1;
1355
+ }
1356
+ else {
1357
+ // compute ^t || .*
1358
+ Transducer *t1 = &t->upper_level();
1359
+ tmp = &(*pi || *t1);
1360
+ delete t1;
1361
+ }
1362
+ delete t;
1363
+
1364
+ Transducer *l2=context_transducer( tmp, pi, mt, c );
1365
+ if (tmp != pi)
1366
+ delete tmp;
1367
+ delete pi;
1368
+ delete mt;
1369
+
1370
+ Transducer *result=result_transducer( l1, l2, type, marker );
1371
+ delete l1;
1372
+ delete l2;
1373
+
1374
+ free_contexts( c );
1375
+
1376
+ return result;
1377
+ }
1378
+
1379
+
1380
+ /*******************************************************************/
1381
+ /* */
1382
+ /* Interface::insert_boundary_transducer */
1383
+ /* */
1384
+ /*******************************************************************/
1385
+
1386
+ Transducer *Interface::insert_boundary_transducer( Character leftm, Character rightm,
1387
+ Alphabet &alph )
1388
+ {
1389
+ // Create the insert boundaries transducer (.|<>:<L>|<>:<R>)*
1390
+
1391
+ Transducer *result=pi_machine( alph );
1392
+ Node *root=result->root_node();
1393
+ root->add_arc( Label(Label::epsilon, leftm), root, result);
1394
+ root->add_arc( Label(Label::epsilon, rightm),root, result);
1395
+
1396
+ return result;
1397
+ }
1398
+
1399
+
1400
+ /*******************************************************************/
1401
+ /* */
1402
+ /* Interface::remove_boundary_transducer */
1403
+ /* */
1404
+ /*******************************************************************/
1405
+
1406
+ Transducer *Interface::remove_boundary_transducer( Character leftm, Character rightm,
1407
+ Alphabet &alph )
1408
+ {
1409
+ // Create the remove boundaries transducer (.|<L>:<>|<R>:<>)*
1410
+
1411
+ Transducer *result=pi_machine( alph );
1412
+ Node *root = result->root_node();
1413
+ root->add_arc( Label(leftm, Label::epsilon), root, result);
1414
+ root->add_arc( Label(rightm,Label::epsilon), root, result);
1415
+
1416
+ return result;
1417
+ }
1418
+
1419
+
1420
+ /*******************************************************************/
1421
+ /* */
1422
+ /* Interface::constrain_boundary_transducer */
1423
+ /* */
1424
+ /*******************************************************************/
1425
+
1426
+ Transducer *Interface::constrain_boundary_transducer( Character leftm,
1427
+ Character rightm,
1428
+ Alphabet &alph)
1429
+ {
1430
+ // create the transducer (.|<L>|<R>)*
1431
+
1432
+ Transducer *tmp=pi_machine( alph );
1433
+
1434
+ // create the transducer (.|<L>|<R>)* <L><R> (.|<L>|<R>)*
1435
+ Node *root = tmp->root_node();
1436
+ Node *node = tmp->new_node();
1437
+ Node *last = tmp->new_node();
1438
+
1439
+ root->set_final(0);
1440
+ last->set_final(1);
1441
+
1442
+ root->add_arc( Label(leftm), node, tmp);
1443
+ node->add_arc( Label(rightm), last, tmp);
1444
+
1445
+ add_pi_transitions( tmp, last, alph );
1446
+
1447
+ // create the transducer !((.|<L>|<R>)* <L><R> (.|<L>|<R>)*)
1448
+ tmp->alphabet.copy(alph);
1449
+ Transducer *result = &(!*tmp);
1450
+ delete tmp;
1451
+
1452
+ return result;
1453
+ }
1454
+
1455
+
1456
+ /*******************************************************************/
1457
+ /* */
1458
+ /* Interface::extended_left_transducer */
1459
+ /* */
1460
+ /*******************************************************************/
1461
+
1462
+ Transducer *Interface::extended_left_transducer( Transducer *t, Character m1,
1463
+ Character m2, Alphabet &alpha )
1464
+ {
1465
+ if (t == NULL) // empty context
1466
+ return pi_machine(alpha);
1467
+
1468
+ // Extended left context transducer
1469
+
1470
+ // <R> >> (<L> >> $T$)
1471
+ Transducer *tmp=&t->freely_insert( Label(m1) );
1472
+ delete t;
1473
+ t = &tmp->freely_insert( Label(m2) );
1474
+ delete tmp;
1475
+
1476
+ // .* (<R> >> (<L> >> $T$))
1477
+ add_pi_transitions( t, t->root_node(), alpha );
1478
+
1479
+ // !(.*<L>)
1480
+ tmp = one_label_transducer(Label(m1));
1481
+ add_pi_transitions( tmp, tmp->root_node(), alpha );
1482
+ tmp->alphabet.copy(alpha);
1483
+ Transducer *t2 = &(!*tmp);
1484
+ delete tmp;
1485
+
1486
+ // .* (<R> >> (<L> >> $T$)) || !(.*<L>)
1487
+ tmp = &(*t || *t2);
1488
+ delete t;
1489
+ delete t2;
1490
+
1491
+ return tmp;
1492
+ }
1493
+
1494
+
1495
+ /*******************************************************************/
1496
+ /* */
1497
+ /* Interface::left_context */
1498
+ /* */
1499
+ /*******************************************************************/
1500
+
1501
+ Transducer *Interface::left_context( Transducer *t, Character leftm,
1502
+ Character rightm, Alphabet &alph )
1503
+ {
1504
+ // .* (<R> >> (<L> >> $T$)) || !(.*<L>)
1505
+ Transducer *ct = extended_left_transducer(t, leftm, rightm, alph);
1506
+
1507
+ // <L>
1508
+ Transducer *mt = one_label_transducer(Label(leftm));
1509
+ // <R>* <L>
1510
+ mt->root_node()->add_arc(Label(rightm), mt->root_node(), mt );
1511
+ // <R>* <L> .*
1512
+ add_pi_transitions(mt, mt->root_node()->target_node(Label(leftm)), alph);
1513
+
1514
+ ct->alphabet.copy( alph );
1515
+ Transducer *no_ct = &!*ct;
1516
+
1517
+ mt->alphabet.copy(alph);
1518
+ Transducer *no_mt = &!*mt;
1519
+
1520
+ Transducer *t1 = &(*no_ct + *mt);
1521
+ delete no_ct;
1522
+ delete mt;
1523
+
1524
+ Transducer *t2 = &(*ct + *no_mt);
1525
+ delete ct;
1526
+ delete no_mt;
1527
+
1528
+ Transducer *tmp = &(*t1 | *t2);
1529
+ delete t1;
1530
+ delete t2;
1531
+
1532
+ tmp->alphabet.copy( alph );
1533
+ t1 = &!*tmp;
1534
+ delete tmp;
1535
+
1536
+ return t1;
1537
+ }
1538
+
1539
+
1540
+ /*******************************************************************/
1541
+ /* */
1542
+ /* Interface::right_context */
1543
+ /* */
1544
+ /*******************************************************************/
1545
+
1546
+ Transducer *Interface::right_context( Transducer *t, Character leftm,
1547
+ Character rightm, Alphabet &alph )
1548
+ {
1549
+ // right context transducer: (<R> >> (<L> >> $T$)) .* || !(<R>.*)
1550
+ Transducer *tmp = &t->reverse();
1551
+ delete t;
1552
+ Transducer *t2 = left_context(tmp, rightm, leftm, alph);
1553
+ Transducer *result = &t2->reverse();
1554
+ delete t2;
1555
+ return result;
1556
+ }
1557
+
1558
+
1559
+ /*******************************************************************/
1560
+ /* */
1561
+ /* Interface::make_optional */
1562
+ /* */
1563
+ /*******************************************************************/
1564
+
1565
+ Transducer *Interface::make_optional( Transducer *t, Repl_Type type )
1566
+
1567
+ {
1568
+ Transducer *t1;
1569
+ if (type == my_repl_down)
1570
+ t1 = &t->upper_level();
1571
+ else
1572
+ t1 = &t->lower_level();
1573
+
1574
+ Transducer *t2 = &(*t | *t1);
1575
+
1576
+ delete t;
1577
+ delete t1;
1578
+
1579
+ return t2;
1580
+ }
1581
+
1582
+
1583
+ /*******************************************************************/
1584
+ /* */
1585
+ /* Interface::replace */
1586
+ /* */
1587
+ /*******************************************************************/
1588
+
1589
+ Transducer *Interface::replace( Transducer *ct, Repl_Type type,
1590
+ bool optional )
1591
+ {
1592
+ if (optional)
1593
+ ct = make_optional(ct, type);
1594
+
1595
+ // compute the no-center transducer
1596
+ Transducer *t1=NULL;
1597
+
1598
+ Transducer *pi = pi_machine(TheAlphabet);
1599
+ if (type == repl_up) {
1600
+ // _ct || .*
1601
+ Transducer *t2 = &ct->lower_level();
1602
+ t1 = &(*t2 || *pi);
1603
+ delete t2;
1604
+ }
1605
+ else if (type == my_repl_down) {
1606
+ // .* || ^ct
1607
+ Transducer *t2 = &ct->upper_level();
1608
+ t1 = &(*pi || *t2);
1609
+ delete t2;
1610
+ }
1611
+ else
1612
+ error("Invalid type of replace operator");
1613
+
1614
+ {
1615
+ // _ct without empty string
1616
+ Transducer *t2 = empty_string_transducer();
1617
+ Transducer *t3 = &(*t1 / *t2);
1618
+ delete t1;
1619
+ delete t2;
1620
+ t1 = t3;
1621
+ }
1622
+
1623
+ // .* _ct
1624
+ Transducer *t2 = &(*pi + *t1);
1625
+ delete t1;
1626
+
1627
+ // .* _ct .*
1628
+ t1 = &(*t2 + *pi);
1629
+ delete pi;
1630
+ delete t2;
1631
+
1632
+ // no_ct = !(.* _ct .*)
1633
+ t1->alphabet.copy(TheAlphabet);
1634
+ Transducer *no_ct = &(!*t1);
1635
+ delete t1;
1636
+
1637
+ // compute the unconditional replacement transducer
1638
+
1639
+ // no-ct ct
1640
+ t1 = &(*no_ct + *ct);
1641
+ delete ct;
1642
+
1643
+ // (no-ct ct)*
1644
+ t2 = &(t1->kleene_star());
1645
+ delete t1;
1646
+
1647
+ // (no-ct ct)* no-ct
1648
+ t1 = &(*t2 + *no_ct);
1649
+ delete t2;
1650
+ delete no_ct;
1651
+
1652
+ return t1;
1653
+ }
1654
+
1655
+
1656
+ /*******************************************************************/
1657
+ /* */
1658
+ /* Interface::replace_transducer */
1659
+ /* */
1660
+ /*******************************************************************/
1661
+
1662
+ Transducer *Interface::replace_transducer( Transducer *ct, Character lm,
1663
+ Character rm, Repl_Type type )
1664
+ {
1665
+ // insert boundary markers into the center transducer
1666
+
1667
+ // <L> >> (<R> >> $Center$)
1668
+ Transducer *tmp = &ct->freely_insert(Label(lm));
1669
+ delete ct;
1670
+ ct = &tmp->freely_insert(Label(rm));
1671
+ delete tmp;
1672
+
1673
+ // add surrounding boundary markers to the center transducer
1674
+
1675
+ // <L> (<L> >> (<R> >> $Center$))
1676
+ Transducer *t2 = one_label_transducer( Label(lm) );
1677
+ tmp = &(*t2 + *ct);
1678
+ delete t2;
1679
+ delete ct;
1680
+
1681
+ // $CenterB$ = <L> (<L> >> (<R> >> $Center$)) <R>
1682
+ t2 = one_label_transducer( Label(rm) );
1683
+ ct = &(*tmp + *t2);
1684
+ delete tmp;
1685
+ delete t2;
1686
+
1687
+ return replace(ct, type, false);
1688
+ }
1689
+
1690
+
1691
+ /*******************************************************************/
1692
+ /* */
1693
+ /* Interface::replace_in_context */
1694
+ /* */
1695
+ /*******************************************************************/
1696
+
1697
+ Transducer *Interface::replace_in_context( Transducer *t, Repl_Type type,
1698
+ Contexts *c, bool optional )
1699
+ {
1700
+ if (optional)
1701
+ t = make_optional(t, type);
1702
+
1703
+ // The implementation of the replace operators is based on
1704
+ // "The Replace Operator" by Lauri Karttunen
1705
+
1706
+ if (!Alphabet_Defined)
1707
+ error("The replace operators require the definition of an alphabet");
1708
+
1709
+ if (!c->left->is_automaton() || !c->right->is_automaton())
1710
+ error("The replace operators require automata as context expressions! (Do not include any character mappings x:y between the two parentheses of the operator.)");
1711
+
1712
+ if (type == my_repl_down) {
1713
+ Transducer *t2 = empty_string_transducer();
1714
+ Transducer *t3 = &(*t || *t2);
1715
+ if (!t3->is_empty())
1716
+ cerr << "\nWarning: The source of the replace operation contains the empty string! (Such insertion operations do not work.)\n";
1717
+ delete t2;
1718
+ delete t3;
1719
+ }
1720
+ else {
1721
+ Transducer *t2 = empty_string_transducer();
1722
+ Transducer *t3 = &(*t2 || *t);
1723
+ if (!t3->is_empty())
1724
+ cerr << "\nWarning: The source of the replace operation contains the empty string! (Such insertion operations do not work.)\n";
1725
+ delete t2;
1726
+ delete t3;
1727
+ }
1728
+
1729
+ // create the marker symbols
1730
+ Character leftm = TheAlphabet.new_marker();
1731
+ Character rightm = TheAlphabet.new_marker();
1732
+
1733
+ // create the upper and lower alphabets
1734
+ Alphabet lower_alph;
1735
+ lower_alph.copy( TheAlphabet, lower );
1736
+ Alphabet upper_alph;
1737
+ upper_alph.copy( TheAlphabet, upper );
1738
+
1739
+ /////////////////////////////////////////////////////////////
1740
+ // Create the insert boundaries transducer (.|<>:<L>|<>:<R>)*
1741
+ /////////////////////////////////////////////////////////////
1742
+
1743
+ Transducer *tmp=insert_boundary_transducer( leftm, rightm, lower_alph );
1744
+
1745
+ /////////////////////////////////////////////////////////////
1746
+ // Create the remove boundaries transducer (.|<L>:<>|<R>:<>)*
1747
+ /////////////////////////////////////////////////////////////
1748
+
1749
+ Transducer *rbt=remove_boundary_transducer( leftm, rightm, upper_alph );
1750
+
1751
+ // Add the markers to the alphabet
1752
+ TheAlphabet.insert(Label(leftm));
1753
+ TheAlphabet.insert(Label(rightm));
1754
+ lower_alph.insert(Label(leftm));
1755
+ lower_alph.insert(Label(rightm));
1756
+ upper_alph.insert(Label(leftm));
1757
+ upper_alph.insert(Label(rightm));
1758
+
1759
+ /////////////////////////////////////////////////////////////
1760
+ // unconditional replace transducer
1761
+ /////////////////////////////////////////////////////////////
1762
+
1763
+ Transducer *rt;
1764
+ if (type == my_repl_down)
1765
+ rt = replace_transducer( t, leftm, rightm, my_repl_down );
1766
+ else
1767
+ rt = replace_transducer( t, leftm, rightm, repl_up );
1768
+
1769
+ /////////////////////////////////////////////////////////////
1770
+ // build the conditional replacement transducer
1771
+ /////////////////////////////////////////////////////////////
1772
+
1773
+ if (type != my_repl_down) {
1774
+ // Create the constrain boundaries transducer !(.*<L><R>.*)
1775
+ Transducer *cbt=constrain_boundary_transducer(leftm, rightm, lower_alph);
1776
+
1777
+ Transducer *t2 = &(*tmp || *cbt);
1778
+ delete tmp;
1779
+ delete cbt;
1780
+ tmp = t2;
1781
+ }
1782
+
1783
+ if (type == repl_up || type == repl_left) {
1784
+ // left context transducer: .* (<R> >> (<L> >> $T$)) || !(.*<L>)
1785
+ Transducer *lct = left_context(c->left, leftm, rightm, lower_alph);
1786
+
1787
+ Transducer *t2 = &(*tmp || *lct);
1788
+ delete tmp;
1789
+ delete lct;
1790
+ tmp = t2;
1791
+ }
1792
+
1793
+ if (type == repl_up || type == repl_right) {
1794
+ // right context transducer: (<R> >> (<L> >> $T$)) .* || !(<R>.*)
1795
+ Transducer *rct = right_context(c->right, leftm, rightm, lower_alph);
1796
+
1797
+ Transducer *t2 = &(*tmp || *rct);
1798
+ delete tmp;
1799
+ delete rct;
1800
+ tmp = t2;
1801
+ }
1802
+
1803
+ {
1804
+ // Apply the replacement transducer
1805
+ Transducer *t2 = &(*tmp || *rt);
1806
+ delete tmp;
1807
+ delete rt;
1808
+ tmp = t2;
1809
+ }
1810
+
1811
+ if (type == my_repl_down || type == repl_down || type == repl_right) {
1812
+ // left context transducer: .* (<R> >> (<L> >> $T$)) || !(.*<L>)
1813
+ Transducer *lct = left_context(c->left, leftm, rightm, upper_alph);
1814
+
1815
+ Transducer *t2 = &(*tmp || *lct);
1816
+ delete tmp;
1817
+ delete lct;
1818
+ tmp = t2;
1819
+ }
1820
+ if (type == my_repl_down || type == repl_down || type == repl_left) {
1821
+ // right context transducer: (<R> >> (<L> >> $T$)) .* || !(<R>.*)
1822
+ Transducer *rct = right_context(c->right, leftm, rightm, upper_alph);
1823
+
1824
+ Transducer *t2 = &(*tmp || *rct);
1825
+ delete tmp;
1826
+ delete rct;
1827
+ tmp = t2;
1828
+ }
1829
+
1830
+ if (type == my_repl_down) {
1831
+ // Create the constrain boundaries transducer !(.*<L><R>.*)
1832
+ Transducer *cbt=constrain_boundary_transducer(leftm, rightm, upper_alph);
1833
+
1834
+ Transducer *t2 = &(*tmp || *cbt);
1835
+ delete(tmp);
1836
+ delete(cbt);
1837
+ tmp = t2;
1838
+ }
1839
+
1840
+ Transducer *result = &(*tmp || *rbt);
1841
+ delete tmp;
1842
+ delete rbt;
1843
+
1844
+ // Remove the markers from the alphabet
1845
+ TheAlphabet.delete_markers();
1846
+
1847
+ free_contexts( c );
1848
+
1849
+ return result;
1850
+ }
1851
+
1852
+
1853
+ /*******************************************************************/
1854
+ /* */
1855
+ /* Interface::add_alphabet */
1856
+ /* */
1857
+ /*******************************************************************/
1858
+
1859
+ void Interface::add_alphabet( Transducer *t )
1860
+
1861
+ {
1862
+ t->alphabet.copy(TheAlphabet);
1863
+ t->complete_alphabet();
1864
+ }
1865
+
1866
+
1867
+ /*******************************************************************/
1868
+ /* */
1869
+ /* Interface::write_to_file */
1870
+ /* */
1871
+ /*******************************************************************/
1872
+
1873
+ void Interface::write_to_file( Transducer *t, char *filename)
1874
+
1875
+ {
1876
+ FILE *file;
1877
+ if ((file = fopen(filename,"wb")) == NULL) {
1878
+ fprintf(stderr,"\nError: Cannot open output file \"%s\"\n\n", filename);
1879
+ exit(1);
1880
+ }
1881
+ free( filename );
1882
+
1883
+ t = explode(t);
1884
+ add_alphabet(t);
1885
+ t = minimise(t);
1886
+ t->store(file);
1887
+ fclose(file);
1888
+ }
1889
+
1890
+
1891
+ /*******************************************************************/
1892
+ /* */
1893
+ /* Interface::result */
1894
+ /* */
1895
+ /*******************************************************************/
1896
+
1897
+ Transducer *Interface::result( Transducer *t, bool switch_flag )
1898
+
1899
+ {
1900
+ t = explode(t);
1901
+
1902
+ // delete the variable values
1903
+ vector<char*> s;
1904
+ for( VarMap::iterator it=VM.begin(); it != VM.end(); it++ ) {
1905
+ s.push_back(it->first);
1906
+ delete it->second;
1907
+ it->second = NULL;
1908
+ }
1909
+ VM.clear();
1910
+ for( size_t i=0; i<s.size(); i++ )
1911
+ free(s[i]);
1912
+ s.clear();
1913
+
1914
+ if (switch_flag)
1915
+ t = switch_levels(t);
1916
+ add_alphabet(t);
1917
+ t = minimise(t);
1918
+ return t;
1919
+ }
1920
+
1921
+ }